library(tidyverse)
library(plotly)
library(sf)
library(mapview)
library(tigris)
library(censusapi)
library(leaflet)
library(lehdr)
library(usmap)
library(lmtest)
library(pracma)
library(lmtest)
library(forecast)
library(vars)
library(rvest)
library(RSelenium)
library(seleniumPipes)
library(dLagM)
library(jsonlite)
library(rgdal)
library(esri2sf)
library(readr)
options(
tigris_class = "sf",
tigris_use_cache = TRUE
)
Sys.setenv(CENSUS_KEY="10dcd73d7c043e91bac9fb8d3989cbff54b08790")
# SET your path here to the covid19analysis folder.
sg_path <- '/Users/simonespeizer/pCloud Drive/Shared/SFBI/Restricted Data Library/Safegraph/covid19analysis/'
sg_hourly_path <- '/Users/simonespeizer/pCloud Drive/Shared/SFBI/Restricted Data Library/Safegraph/covid19analysis/weekly-patterns/v2/hourly/'
Load SFC and Alameda case data, and SCC case data.
# block groups
sf_blockgroups <-
block_groups("CA","San Francisco", cb=F, progress_bar=F) %>%
st_transform('+proj=longlat +datum=WGS84')
ac_blockgroups <-
block_groups("CA","Alameda", cb=F, progress_bar=F) %>%
st_transform('+proj=longlat +datum=WGS84')
sf_bgs <- sf_blockgroups %>% pull(GEOID)
ac_bgs <- ac_blockgroups %>% pull(GEOID)
# zip code areas
zctas_94 <-
zctas(cb = F, starts_with = "94") %>%
st_transform('+proj=longlat +datum=WGS84') %>%
dplyr::select(ZCTA5CE10, geometry)
zctas_95 <-
zctas(cb = F, starts_with = "95") %>%
st_transform('+proj=longlat +datum=WGS84') %>%
dplyr::select(ZCTA5CE10, geometry)
zctas_94_95 <- rbind(zctas_94, zctas_95)
# join with block groups
sf_bg_zctas <- sf_blockgroups %>%
st_centroid() %>%
st_join(zctas_94_95) %>%
dplyr::select(GEOID, ZCTA5CE10) %>%
rename(blockgroup = GEOID, zipcode = ZCTA5CE10)
ac_bg_zctas <- ac_blockgroups %>%
st_centroid() %>%
st_join(zctas_94_95) %>%
dplyr::select(GEOID, ZCTA5CE10) %>%
rename(blockgroup = GEOID, zipcode = ZCTA5CE10)
# get SF case data
sf_place_cases <-
read_csv("https://raw.githubusercontent.com/datadesk/california-coronavirus-data/master/latimes-place-totals.csv") %>%
filter(county == 'San Francisco')
# get population data for San Francisco
acs_vars = readRDS("/Users/simonespeizer/CEE 218Z/censusData2018_acs_acs5.rds")
# define a function for pulling census data
pullCensus <- function(variableToPull, county) {
regionString <- paste0("state:06+county:", county)
censusData <- getCensus(
name = "acs/acs5",
vintage = 2018,
region = "block group:*",
regionin = regionString,
vars = variableToPull
) %>%
mutate(blockgroup = paste0(state,county,tract,block_group)) %>%
select_if(!names(.) %in% c("GEO_ID","state","county","tract","block_group","NAME"))
return(censusData)
}
sf_fips <- fips("CA", "San Francisco") %>% substr(3,5)
# getting population by zip code
sf_pop_zip <- pullCensus("B01003_001E", sf_fips) %>%
left_join(sf_bg_zctas %>% dplyr::select(blockgroup, zipcode)) %>%
group_by(zipcode) %>%
summarize(total_pop_zip = sum(B01003_001E))
# join with cases and calculate cases per person
sf_cases_zip <- sf_place_cases %>% left_join(sf_pop_zip, by = c("place" = "zipcode")) %>%
mutate(cases_by_pop = confirmed_cases / total_pop_zip) %>%
rename(zipcode = place)
# get Alameda County case data - downloaded manually
ac_place_cases <- read.csv("Simone_Speizer/Alameda_County_Cumulative_Cases_By_Place_And_Zip.csv")
# handle the ones that are reported as <10 by calling them 5 cases. Note this is a simplification/choice I made and could be changed.
ac_cases_zip <- ac_place_cases %>%
rename(date = DtCreate) %>%
mutate(date = date %>% substr(1,10) %>% as.Date()) %>%
dplyr::select(c(date, contains("F9"))) %>% # only select zip code data
gather(key = "zipcode", value = "cases", -date) %>%
mutate(cases = ifelse(cases == "<10", "5", cases),
zipcode = zipcode %>% substr(2,6)) %>% # replace cases <10 with 10 and remove the "F" from zipcode names
mutate(cases = as.numeric(cases))
# get Alameda County populations by zip code
ac_fips <- fips("CA", "Alameda") %>% substr(3,5)
# calculate population by zip code
ac_pop_zip <- pullCensus("B01003_001E", ac_fips) %>%
left_join(ac_bg_zctas %>% dplyr::select(blockgroup, zipcode)) %>%
group_by(zipcode) %>%
summarize(total_pop_zip = sum(B01003_001E))
# join with cases
ac_cases_zip <- ac_cases_zip %>% left_join(ac_pop_zip) %>%
mutate(cases_by_pop = cases / total_pop_zip)
First pull the demographic information by zip code.
# block group populations
sf_pop_bg <- pullCensus("B01003_001E", sf_fips) %>%
rename(total_pop = B01003_001E)
ac_pop_bg <- pullCensus("B01003_001E", ac_fips) %>%
rename(total_pop = B01003_001E)
sf_avg_household_size <- pullCensus("B25010_001E", sf_fips) %>%
filter(B25010_001E > 0) %>% # some had negative values
left_join(sf_bg_zctas %>% dplyr::select(blockgroup, zipcode)) %>%
left_join(sf_pop_bg) %>%
group_by(zipcode) %>%
summarize(avg_household_size = weighted.mean(B25010_001E, total_pop)) %>%
filter(!is.na(zipcode))
# ALAMEDA
# average household size
ac_avg_household_size <- pullCensus("B25010_001E", ac_fips) %>%
filter(B25010_001E > 0) %>%
left_join(ac_bg_zctas %>% dplyr::select(blockgroup, zipcode)) %>%
left_join(ac_pop_bg) %>%
group_by(zipcode) %>%
summarize(avg_household_size = weighted.mean(B25010_001E, total_pop)) %>%
filter(!is.na(zipcode))
# occupants per room
ac_occupants_zip <- pullCensus("group(B25014)", ac_fips) %>%
dplyr::select(-c(contains("EA"),contains("MA"),contains("M"))) %>%
gather(key = "variable", value = "estimate", -blockgroup) %>%
mutate(label = acs_vars$label[match(variable,acs_vars$name)]) %>%
dplyr::select(-variable) %>%
separate(label, into = c(NA, NA, NA,"occupants per room"), sep = "!!") %>%
filter(!is.na(`occupants per room`)) %>%
group_by(blockgroup, `occupants per room`) %>%
summarize(estimate_tot = sum(estimate)) %>%
spread(key = `occupants per room`, value = estimate_tot) %>%
mutate(total_nums = `0.50 or less occupants per room` + `0.51 to 1.00 occupants per room` + `1.01 to 1.50 occupants per room` + `1.51 to 2.00 occupants per room` + `2.01 or more occupants per room`) %>%
left_join(ac_bg_zctas %>% dplyr::select(blockgroup, zipcode)) %>%
group_by(zipcode) %>%
summarize(total_nums = sum(total_nums), total_0.5less = sum(`0.50 or less occupants per room`), total_0.5to1 = sum(`0.51 to 1.00 occupants per room`), total_1to1.5 = sum(`1.01 to 1.50 occupants per room`), total_1.5to2 = sum(`1.51 to 2.00 occupants per room`), total_2more = sum(`2.01 or more occupants per room`)) %>%
mutate(`percent more than 1 occupant` = (total_1to1.5 + total_1.5to2 + total_2more) * 100/ total_nums, `percent less than 1 occupant` = (100-`percent more than 1 occupant`), `percent 0.5 or less occupants` = total_0.5less*100/total_nums, `percent more than 0.5 occupants` = (100-`percent 0.5 or less occupants`), `percent more than 2 occupants` = total_2more*100/total_nums)
# population density
ac_pop_density_zip <- ac_cases_zip %>% filter(date == max(date)) %>%
dplyr::select(zipcode, total_pop_zip) %>%
left_join(zctas_94_95, by = c("zipcode" = "ZCTA5CE10")) %>%
st_as_sf() %>%
mutate(zip_area = st_area(.), pop_density = total_pop_zip / zip_area) %>%
filter(!is.na(pop_density))
# bucketed household size
ac_house_size_zip <- pullCensus("group(B11016)", ac_fips) %>%
dplyr::select(-c(contains("EA"),contains("MA"),contains("M"))) %>%
gather(key = "variable", value = "estimate", -blockgroup) %>%
mutate(label = acs_vars$label[match(variable,acs_vars$name)]) %>%
dplyr::select(-variable) %>%
separate(label, into = c(NA, NA, "type", "size"), sep = "!!") %>%
filter(!is.na(type)) %>%
filter(!is.na(size)) %>%
dplyr::select(-type) %>%
group_by(blockgroup, size) %>%
summarize(`total of this size` = sum(estimate)) %>%
spread(key = size, value = `total of this size`) %>%
mutate(total_nums = `1-person household` + `2-person household` + `3-person household` + `4-person household` + `5-person household`+ `6-person household` + `7-or-more person household`) %>%
left_join(ac_bg_zctas %>% dplyr::select(blockgroup, zipcode)) %>%
group_by(zipcode) %>%
summarize(total_nums = sum(total_nums), total_1 = sum(`1-person household`), total_2 = sum(`2-person household`), total_3 = sum(`3-person household`), total_4 = sum(`4-person household`), total_5 = sum(`5-person household`), total_6 = sum(`6-person household`), total_7more = sum(`7-or-more person household`)) %>%
mutate(`percent 5 or more` = (total_5 + total_6 + total_7more)* 100/ total_nums, `percent 1 or 2 only` = (total_1 + total_2)*100/total_nums)
# units in structure
ac_units_zip <- pullCensus("group(B25024)", ac_fips) %>% dplyr::select(-c(contains("EA"),contains("MA"),contains("M"))) %>%
gather(key = "variable", value = "estimate", -blockgroup) %>%
mutate(label = acs_vars$label[match(variable,acs_vars$name)]) %>%
dplyr::select(-variable) %>%
separate(label, into = c(NA, NA, "units"), sep = "!!") %>%
filter(!is.na(units)) %>%
spread(key = units, value = estimate) %>%
mutate(total_nums = `1, attached` + `1, detached` + `10 to 19` + `2` + `20 to 49`+ `3 or 4` + `5 to 9`+ `50 or more`+ `Boat, RV, van, etc.`+ `Mobile home`, total_20more = `20 to 49`+`50 or more`, total_10more = `10 to 19` + `20 to 49`+`50 or more`, total_1 = `1, attached` + `1, detached`) %>%
left_join(ac_bg_zctas %>% dplyr::select(blockgroup, zipcode)) %>%
group_by(zipcode) %>%
summarize(total_nums = sum(total_nums), total_20more = sum(total_20more), total_10more = sum(total_20more), total_1_only = sum(total_1)) %>%
mutate(`percent 10 or more units` = total_10more*100/total_nums, `percent 20 or more units` = total_20more*100/total_nums, `percent 1 only` = total_1_only*100/total_nums, `percent more than 1 unit` = (100 - `percent 1 only`))
# income
ac_income_zip <- pullCensus("group(B19001)", ac_fips) %>% dplyr::select(-c(contains("EA"),contains("MA"),contains("M"))) %>%
gather(key = "variable", value = "estimate", -blockgroup) %>%
mutate(label = acs_vars$label[match(variable,acs_vars$name)]) %>%
dplyr::select(-variable) %>%
separate(label, into = c(NA, NA, "income"), sep = "!!") %>%
filter(!is.na(income)) %>%
spread(key = income, value = estimate) %>%
mutate(total_nums = `Less than $10,000` + `$10,000 to $14,999` + `$15,000 to $19,999` + `$20,000 to $24,999` + `$25,000 to $29,999` + `$30,000 to $34,999` + `$35,000 to $39,999` + `$40,000 to $44,999` + `$45,000 to $49,999` + `$50,000 to $59,999` + `$60,000 to $74,999` + `$75,000 to $99,999` + `$100,000 to $124,999` + `$125,000 to $149,999` + `$150,000 to $199,999` + `$200,000 or more`, over_75000 = `$75,000 to $99,999` + `$100,000 to $124,999` + `$125,000 to $149,999` + `$150,000 to $199,999` + `$200,000 or more`, over_100000 = `$100,000 to $124,999` + `$125,000 to $149,999` + `$150,000 to $199,999` + `$200,000 or more`, over_125000 = `$125,000 to $149,999` + `$150,000 to $199,999` + `$200,000 or more`) %>%
left_join(ac_bg_zctas %>% dplyr::select(blockgroup, zipcode)) %>%
group_by(zipcode) %>%
summarize(total_nums = sum(total_nums), total_over_75000 = sum(over_75000), total_over_100000 = sum(over_100000), total_over_125000 = sum(over_125000)) %>%
mutate(percent_over_75000 = total_over_75000*100 / total_nums,
percent_under_75000 = (100 - percent_over_75000),
percent_over_100000 = total_over_100000*100 / total_nums,
percent_under_100000 = (100 - percent_over_100000),
percent_over_125000 = total_over_125000*100 / total_nums,
percent_under_125000 = (100 - percent_over_125000))
ac_curr_cases_dem <- left_join(ac_cases_zip %>% filter(date == max(date)), ac_avg_household_size) %>% left_join(ac_pop_density_zip %>% dplyr::select(pop_density, zipcode)) %>% left_join(ac_occupants_zip %>% dplyr::select(`percent more than 1 occupant`, `percent more than 0.5 occupants`, `percent more than 2 occupants`, zipcode)) %>% left_join(ac_units_zip %>% dplyr::select(`percent 10 or more units`, `percent more than 1 unit`, zipcode)) %>% left_join(ac_income_zip %>% dplyr::select(percent_under_75000, percent_under_100000, percent_under_125000, zipcode)) %>% as.data.frame() %>%
filter(!is.na(zipcode) & !is.na(avg_household_size)) %>%
mutate(log_cases_by_pop = log(cases_by_pop))
# SANTA CLARA
# block groups
scc_blockgroups <-
block_groups("CA","Santa Clara", cb=F, progress_bar=F) %>%
st_transform('+proj=longlat +datum=WGS84')
scc_bgs <- scc_blockgroups %>% pull(GEOID)
# join with zip codes
scc_bg_zctas <- scc_blockgroups %>%
st_centroid() %>%
st_join(zctas_94_95) %>%
dplyr::select(GEOID, ZCTA5CE10) %>%
rename(blockgroup = GEOID, zipcode = ZCTA5CE10)
# get case data
scc_map_cases <- esri2sf("https://services2.arcgis.com/RiZWfy7B1r76pKTz/arcgis/rest/services/COVID_zip_FC/FeatureServer/0")
## [1] "Feature Layer"
## [1] "esriGeometryPolygon"
scc_cases_zip <- scc_map_cases %>%
dplyr::select(zipcode, Cases, Population) %>%
st_drop_geometry() %>%
rename(cases = Cases, pop = Population) %>%
mutate(cases = replace_na(cases, 0)) %>%
mutate(cases_by_pop = cases/pop) %>%
filter(is.numeric(cases_by_pop))
# get population data
scc_fips <- fips("CA", "Santa Clara") %>% substr(3,5)
scc_pop_zip <- pullCensus("B01003_001E", scc_fips) %>%
left_join(scc_bg_zctas %>% dplyr::select(blockgroup, zipcode)) %>%
group_by(zipcode) %>%
summarize(total_pop_zip = sum(B01003_001E))
scc_pop_bg <- pullCensus("B01003_001E", scc_fips) %>%
rename(total_pop = B01003_001E)
# average household size
scc_avg_household_size <- pullCensus("B25010_001E", scc_fips) %>%
filter(B25010_001E > 0) %>%
left_join(scc_bg_zctas %>% dplyr::select(blockgroup, zipcode)) %>%
left_join(scc_pop_bg) %>%
group_by(zipcode) %>%
summarize(avg_household_size = weighted.mean(B25010_001E, total_pop)) %>%
filter(!is.na(zipcode))
# occupants per room
scc_occupants_zip <- pullCensus("group(B25014)", scc_fips) %>%
dplyr::select(-c(contains("EA"),contains("MA"),contains("M"))) %>%
gather(key = "variable", value = "estimate", -blockgroup) %>%
mutate(label = acs_vars$label[match(variable,acs_vars$name)]) %>%
dplyr::select(-variable) %>%
separate(label, into = c(NA, NA, NA,"occupants per room"), sep = "!!") %>%
filter(!is.na(`occupants per room`)) %>%
group_by(blockgroup, `occupants per room`) %>%
summarize(estimate_tot = sum(estimate)) %>%
spread(key = `occupants per room`, value = estimate_tot) %>%
mutate(total_nums = `0.50 or less occupants per room` + `0.51 to 1.00 occupants per room` + `1.01 to 1.50 occupants per room` + `1.51 to 2.00 occupants per room` + `2.01 or more occupants per room`) %>%
left_join(scc_bg_zctas %>% dplyr::select(blockgroup, zipcode)) %>%
group_by(zipcode) %>%
summarize(total_nums = sum(total_nums), total_0.5less = sum(`0.50 or less occupants per room`), total_0.5to1 = sum(`0.51 to 1.00 occupants per room`), total_1to1.5 = sum(`1.01 to 1.50 occupants per room`), total_1.5to2 = sum(`1.51 to 2.00 occupants per room`), total_2more = sum(`2.01 or more occupants per room`)) %>%
mutate(`percent more than 1 occupant` = (total_1to1.5 + total_1.5to2 + total_2more) * 100/ total_nums, `percent less than 1 occupant` = (100-`percent more than 1 occupant`), `percent 0.5 or less occupants` = total_0.5less*100/total_nums, `percent more than 0.5 occupants` = (100-`percent 0.5 or less occupants`), `percent more than 2 occupants` = total_2more*100/total_nums)
# population density
scc_pop_density_zip <- scc_cases_zip %>%
dplyr::select(zipcode, pop) %>%
rename(total_pop_zip = pop) %>%
left_join(zctas_94_95, by = c("zipcode" = "ZCTA5CE10")) %>%
st_as_sf() %>%
mutate(zip_area = st_area(.), pop_density = total_pop_zip / zip_area) %>%
filter(!is.na(pop_density))
# bucketed household size
scc_house_size_zip <- pullCensus("group(B11016)", scc_fips) %>%
dplyr::select(-c(contains("EA"),contains("MA"),contains("M"))) %>%
gather(key = "variable", value = "estimate", -blockgroup) %>%
mutate(label = acs_vars$label[match(variable,acs_vars$name)]) %>%
dplyr::select(-variable) %>%
separate(label, into = c(NA, NA, "type", "size"), sep = "!!") %>%
filter(!is.na(type)) %>%
filter(!is.na(size)) %>%
dplyr::select(-type) %>%
group_by(blockgroup, size) %>%
summarize(`total of this size` = sum(estimate)) %>%
spread(key = size, value = `total of this size`) %>%
mutate(total_nums = `1-person household` + `2-person household` + `3-person household` + `4-person household` + `5-person household`+ `6-person household` + `7-or-more person household`) %>%
left_join(scc_bg_zctas %>% dplyr::select(blockgroup, zipcode)) %>%
group_by(zipcode) %>%
summarize(total_nums = sum(total_nums), total_1 = sum(`1-person household`), total_2 = sum(`2-person household`), total_3 = sum(`3-person household`), total_4 = sum(`4-person household`), total_5 = sum(`5-person household`), total_6 = sum(`6-person household`), total_7more = sum(`7-or-more person household`)) %>%
mutate(`percent 5 or more` = (total_5 + total_6 + total_7more)* 100/ total_nums, `percent 1 or 2 only` = (total_1 + total_2)*100/total_nums)
# units in structure
scc_units_zip <- pullCensus("group(B25024)", scc_fips) %>% dplyr::select(-c(contains("EA"),contains("MA"),contains("M"))) %>%
gather(key = "variable", value = "estimate", -blockgroup) %>%
mutate(label = acs_vars$label[match(variable,acs_vars$name)]) %>%
dplyr::select(-variable) %>%
separate(label, into = c(NA, NA, "units"), sep = "!!") %>%
filter(!is.na(units)) %>%
spread(key = units, value = estimate) %>%
mutate(total_nums = `1, attached` + `1, detached` + `10 to 19` + `2` + `20 to 49`+ `3 or 4` + `5 to 9`+ `50 or more`+ `Boat, RV, van, etc.`+ `Mobile home`, total_20more = `20 to 49`+`50 or more`, total_10more = `10 to 19` + `20 to 49`+`50 or more`, total_1 = `1, attached` + `1, detached`) %>%
left_join(scc_bg_zctas %>% dplyr::select(blockgroup, zipcode)) %>%
group_by(zipcode) %>%
summarize(total_nums = sum(total_nums), total_20more = sum(total_20more), total_10more = sum(total_20more), total_1_only = sum(total_1)) %>%
mutate(`percent 10 or more units` = total_10more*100/total_nums, `percent 20 or more units` = total_20more*100/total_nums, `percent 1 only` = total_1_only*100/total_nums, `percent more than 1 unit` = (100 - `percent 1 only`))
# income
scc_income_zip <- pullCensus("group(B19001)", scc_fips) %>% dplyr::select(-c(contains("EA"),contains("MA"),contains("M"))) %>%
gather(key = "variable", value = "estimate", -blockgroup) %>%
mutate(label = acs_vars$label[match(variable,acs_vars$name)]) %>%
dplyr::select(-variable) %>%
separate(label, into = c(NA, NA, "income"), sep = "!!") %>%
filter(!is.na(income)) %>%
spread(key = income, value = estimate) %>%
mutate(total_nums = `Less than $10,000` + `$10,000 to $14,999` + `$15,000 to $19,999` + `$20,000 to $24,999` + `$25,000 to $29,999` + `$30,000 to $34,999` + `$35,000 to $39,999` + `$40,000 to $44,999` + `$45,000 to $49,999` + `$50,000 to $59,999` + `$60,000 to $74,999` + `$75,000 to $99,999` + `$100,000 to $124,999` + `$125,000 to $149,999` + `$150,000 to $199,999` + `$200,000 or more`, over_75000 = `$75,000 to $99,999` + `$100,000 to $124,999` + `$125,000 to $149,999` + `$150,000 to $199,999` + `$200,000 or more`, over_100000 = `$100,000 to $124,999` + `$125,000 to $149,999` + `$150,000 to $199,999` + `$200,000 or more`, over_125000 = `$125,000 to $149,999` + `$150,000 to $199,999` + `$200,000 or more`) %>%
left_join(scc_bg_zctas %>% dplyr::select(blockgroup, zipcode)) %>%
group_by(zipcode) %>%
summarize(total_nums = sum(total_nums), total_over_75000 = sum(over_75000), total_over_100000 = sum(over_100000), total_over_125000 = sum(over_125000)) %>%
mutate(percent_over_75000 = total_over_75000*100 / total_nums,
percent_under_75000 = (100 - percent_over_75000),
percent_over_100000 = total_over_100000*100 / total_nums,
percent_under_100000 = (100 - percent_over_100000),
percent_over_125000 = total_over_125000*100 / total_nums,
percent_under_125000 = (100 - percent_over_125000))
scc_curr_cases_dem <- left_join(scc_cases_zip, scc_avg_household_size) %>% left_join(scc_pop_density_zip %>% dplyr::select(pop_density, zipcode)) %>% left_join(scc_occupants_zip %>% dplyr::select(`percent more than 1 occupant`, `percent more than 0.5 occupants`, `percent more than 2 occupants`, zipcode)) %>% left_join(scc_units_zip %>% dplyr::select(`percent 10 or more units`, `percent more than 1 unit`, zipcode)) %>% left_join(scc_income_zip %>% dplyr::select(percent_under_75000, percent_under_100000, percent_under_125000, zipcode)) %>% as.data.frame() %>%
filter(!is.na(zipcode) & !is.na(avg_household_size)) %>%
mutate(log_cases_by_pop = log(cases_by_pop))
For all of these, I show the correlation with cases per capita and correlation with log of cases per capita.
Household size.
# Alameda
fig_ac_cases_hhs <- plot_ly(ac_curr_cases_dem) %>%
add_trace(x = ~avg_household_size, y = ~cases_by_pop, type = 'scatter', mode = 'markers', text = ~zipcode) %>%
add_trace(x = ~avg_household_size, y = fitted(lm(cases_by_pop~ avg_household_size, ac_curr_cases_dem)), mode = 'lines', showlegend = F) %>%
layout(xaxis = list(title = 'Household size, population weighted average'), yaxis = list(title = 'Current cases per capita'), title = "Alameda")
fig_ac_cases_hhs
ac_cases_hhs_model <- lm(cases_by_pop~ avg_household_size, ac_curr_cases_dem)
summary(ac_cases_hhs_model)
##
## Call:
## lm(formula = cases_by_pop ~ avg_household_size, data = ac_curr_cases_dem)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.0020362 -0.0010094 -0.0001468 0.0007284 0.0049034
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.0032338 0.0013476 -2.400 0.020707 *
## avg_household_size 0.0017964 0.0004724 3.803 0.000438 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.001446 on 44 degrees of freedom
## Multiple R-squared: 0.2473, Adjusted R-squared: 0.2302
## F-statistic: 14.46 on 1 and 44 DF, p-value: 0.0004376
# also do with log of cases
ac_cases_hhs_model_log <- lm(log_cases_by_pop~ avg_household_size, ac_curr_cases_dem)
summary(ac_cases_hhs_model_log)
##
## Call:
## lm(formula = log_cases_by_pop ~ avg_household_size, data = ac_curr_cases_dem)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.25329 -0.64851 0.06326 0.66328 1.49219
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -8.9684 0.6862 -13.069 < 2e-16 ***
## avg_household_size 0.8257 0.2406 3.432 0.00131 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7366 on 44 degrees of freedom
## Multiple R-squared: 0.2112, Adjusted R-squared: 0.1932
## F-statistic: 11.78 on 1 and 44 DF, p-value: 0.001315
# SCC
fig_scc_cases_hhs <- plot_ly(scc_curr_cases_dem) %>%
add_trace(x = ~avg_household_size, y = ~cases_by_pop, type = 'scatter', mode = 'markers', text = ~zipcode) %>%
add_trace(x = ~avg_household_size, y = fitted(lm(cases_by_pop~ avg_household_size, scc_curr_cases_dem)), mode = 'lines', showlegend = F) %>%
layout(xaxis = list(title = 'Household size, population weighted average'), yaxis = list(title = 'Current cases per capita'), title = "Santa Clara")
fig_scc_cases_hhs
scc_cases_hhs_model <- lm(cases_by_pop~ avg_household_size, scc_curr_cases_dem)
summary(scc_cases_hhs_model)
##
## Call:
## lm(formula = cases_by_pop ~ avg_household_size, data = scc_curr_cases_dem)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.0017069 -0.0004330 -0.0000825 0.0001695 0.0038324
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.0007294 0.0006548 -1.114 0.27031
## avg_household_size 0.0006462 0.0002144 3.015 0.00394 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.000879 on 53 degrees of freedom
## Multiple R-squared: 0.1464, Adjusted R-squared: 0.1303
## F-statistic: 9.088 on 1 and 53 DF, p-value: 0.003943
# log
scc_cases_hhs_model_log <- lm(log_cases_by_pop~ avg_household_size, scc_curr_cases_dem %>% filter(cases != 0))
summary(scc_cases_hhs_model_log)
##
## Call:
## lm(formula = log_cases_by_pop ~ avg_household_size, data = scc_curr_cases_dem %>%
## filter(cases != 0))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.96137 -0.32319 -0.00645 0.17926 1.81371
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -8.1615 0.4201 -19.427 < 2e-16 ***
## avg_household_size 0.4700 0.1366 3.439 0.00125 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5049 on 46 degrees of freedom
## Multiple R-squared: 0.2046, Adjusted R-squared: 0.1873
## F-statistic: 11.83 on 1 and 46 DF, p-value: 0.00125
Population density
# Alameda
fig_ac_cases_pop_dens <- plot_ly(ac_curr_cases_dem) %>%
add_trace(x = ~pop_density, y = ~cases_by_pop, type = 'scatter', mode = 'markers', text = ~zipcode) %>%
add_trace(x = ~pop_density, y = fitted(lm(cases_by_pop~ pop_density, ac_curr_cases_dem)), mode = 'lines', showlegend = F) %>%
layout(xaxis = list(title = 'Population density'), yaxis = list(title = 'Current cases per capita'), title = "Alameda")
fig_ac_cases_pop_dens
ac_cases_pop_dens_model <- lm(cases_by_pop~ pop_density, ac_curr_cases_dem)
summary(ac_cases_pop_dens_model)
##
## Call:
## lm(formula = cases_by_pop ~ pop_density, data = ac_curr_cases_dem)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.0017806 -0.0011573 -0.0005960 0.0003101 0.0058180
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.0017236 0.0003775 4.566 3.99e-05 ***
## pop_density 0.0376935 0.1057325 0.356 0.723
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.001665 on 44 degrees of freedom
## Multiple R-squared: 0.00288, Adjusted R-squared: -0.01978
## F-statistic: 0.1271 on 1 and 44 DF, p-value: 0.7232
ac_cases_pop_dens_model_log <- lm(log_cases_by_pop~ pop_density, ac_curr_cases_dem)
summary(ac_cases_pop_dens_model_log)
##
## Call:
## lm(formula = log_cases_by_pop ~ pop_density, data = ac_curr_cases_dem)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.26522 -0.70930 0.05947 0.46639 1.80817
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -6.624 0.188 -35.233 <2e-16 ***
## pop_density -6.977 52.659 -0.132 0.895
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.8292 on 44 degrees of freedom
## Multiple R-squared: 0.0003988, Adjusted R-squared: -0.02232
## F-statistic: 0.01755 on 1 and 44 DF, p-value: 0.8952
# SCC
fig_scc_cases_pop_dens <- plot_ly(scc_curr_cases_dem) %>%
add_trace(x = ~pop_density, y = ~cases_by_pop, type = 'scatter', mode = 'markers', text = ~zipcode) %>%
add_trace(x = ~pop_density, y = fitted(lm(cases_by_pop~ pop_density, scc_curr_cases_dem)), mode = 'lines', showlegend = F) %>%
layout(xaxis = list(title = 'Population density'), yaxis = list(title = 'Current cases per capita'), title = "Santa Clara")
fig_scc_cases_pop_dens
scc_cases_pop_dens_model <- lm(cases_by_pop~ pop_density, scc_curr_cases_dem)
summary(scc_cases_pop_dens_model)
##
## Call:
## lm(formula = cases_by_pop ~ pop_density, data = scc_curr_cases_dem)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.0018121 -0.0005262 -0.0000675 0.0003253 0.0033551
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.0009785 0.0002150 4.551 3.16e-05 ***
## pop_density 0.1002471 0.0747943 1.340 0.186
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0009357 on 53 degrees of freedom
## Multiple R-squared: 0.03278, Adjusted R-squared: 0.01453
## F-statistic: 1.796 on 1 and 53 DF, p-value: 0.1859
scc_cases_pop_dens_model_log <- lm(log_cases_by_pop~ pop_density, scc_curr_cases_dem %>% filter(cases != 0))
summary(scc_cases_pop_dens_model_log)
##
## Call:
## lm(formula = log_cases_by_pop ~ pop_density, data = scc_curr_cases_dem %>%
## filter(cases != 0))
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.11800 -0.43629 0.02966 0.30559 1.48172
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -6.9372 0.1489 -46.592 <2e-16 ***
## pop_density 84.0741 53.2314 1.579 0.121
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5513 on 46 degrees of freedom
## Multiple R-squared: 0.05144, Adjusted R-squared: 0.03082
## F-statistic: 2.495 on 1 and 46 DF, p-value: 0.1211
Occupants per room.
# Alameda
fig_ac_cases_occ_1 <- plot_ly(ac_curr_cases_dem) %>%
add_trace(x = ~`percent more than 1 occupant`, y = ~cases_by_pop, type = 'scatter', mode = 'markers', text = ~zipcode) %>%
add_trace(x = ~`percent more than 1 occupant`, y = fitted(lm(cases_by_pop~ `percent more than 1 occupant`, ac_curr_cases_dem)), mode = 'lines', showlegend = F) %>%
layout(xaxis = list(title = 'Percent more than 1 occupant per room'), yaxis = list(title = 'Current cases per capita'), title = "Alameda")
fig_ac_cases_occ_1
ac_cases_occ_1_model <- lm(cases_by_pop~ `percent more than 1 occupant`, ac_curr_cases_dem)
summary(ac_cases_occ_1_model)
##
## Call:
## lm(formula = cases_by_pop ~ `percent more than 1 occupant`, data = ac_curr_cases_dem)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.0022719 -0.0005746 -0.0000026 0.0005459 0.0035099
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.384e-04 2.690e-04 0.515 0.609
## `percent more than 1 occupant` 2.459e-04 3.157e-05 7.789 8.13e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.001081 on 44 degrees of freedom
## Multiple R-squared: 0.5796, Adjusted R-squared: 0.5701
## F-statistic: 60.66 on 1 and 44 DF, p-value: 8.127e-10
ac_cases_occ_1_model_log <- lm(log_cases_by_pop~ `percent more than 1 occupant`, ac_curr_cases_dem)
summary(ac_cases_occ_1_model_log)
##
## Call:
## lm(formula = log_cases_by_pop ~ `percent more than 1 occupant`,
## data = ac_curr_cases_dem)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.64926 -0.35425 0.07074 0.28947 1.51698
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.40116 0.14979 -49.410 < 2e-16 ***
## `percent more than 1 occupant` 0.11048 0.01758 6.284 1.29e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6021 on 44 degrees of freedom
## Multiple R-squared: 0.473, Adjusted R-squared: 0.461
## F-statistic: 39.49 on 1 and 44 DF, p-value: 1.288e-07
# SCC
fig_scc_cases_occ_1 <- plot_ly(scc_curr_cases_dem) %>%
add_trace(x = ~`percent more than 1 occupant`, y = ~cases_by_pop, type = 'scatter', mode = 'markers', text = ~zipcode) %>%
add_trace(x = ~`percent more than 1 occupant`, y = fitted(lm(cases_by_pop~ `percent more than 1 occupant`, scc_curr_cases_dem)), mode = 'lines', showlegend = F) %>%
layout(xaxis = list(title = 'Percent more than 1 occupant per room'), yaxis = list(title = 'Current cases per capita'), title = "Santa Clara")
fig_scc_cases_occ_1
scc_cases_occ_1_model <- lm(cases_by_pop~ `percent more than 1 occupant`, scc_curr_cases_dem)
summary(scc_cases_occ_1_model)
##
## Call:
## lm(formula = cases_by_pop ~ `percent more than 1 occupant`, data = scc_curr_cases_dem)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.0015011 -0.0005152 -0.0000740 0.0003815 0.0033006
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.879e-04 1.808e-04 2.699 0.00932 **
## `percent more than 1 occupant` 9.740e-05 1.969e-05 4.947 8.01e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0007869 on 53 degrees of freedom
## Multiple R-squared: 0.3159, Adjusted R-squared: 0.3029
## F-statistic: 24.47 on 1 and 53 DF, p-value: 8.013e-06
scc_cases_occ_1_model_log <- lm(log_cases_by_pop~ `percent more than 1 occupant`, scc_curr_cases_dem %>% filter(cases != 0))
summary(scc_cases_occ_1_model_log)
##
## Call:
## lm(formula = log_cases_by_pop ~ `percent more than 1 occupant`,
## data = scc_curr_cases_dem %>% filter(cases != 0))
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.10401 -0.22420 -0.01544 0.29984 1.40296
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.12825 0.12395 -57.509 < 2e-16 ***
## `percent more than 1 occupant` 0.04958 0.01291 3.839 0.000375 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4926 on 46 degrees of freedom
## Multiple R-squared: 0.2427, Adjusted R-squared: 0.2262
## F-statistic: 14.74 on 1 and 46 DF, p-value: 0.0003752
That is very high! Try with more than 0.5 occupants.
# Alameda
fig_ac_cases_occ_0.5 <- plot_ly(ac_curr_cases_dem) %>%
add_trace(x = ~`percent more than 0.5 occupants`, y = ~cases_by_pop, type = 'scatter', mode = 'markers', text = ~zipcode) %>%
add_trace(x = ~`percent more than 0.5 occupants`, y = fitted(lm(cases_by_pop~ `percent more than 0.5 occupants`, ac_curr_cases_dem)), mode = 'lines', showlegend = F) %>%
layout(xaxis = list(title = 'Percent more than 0.5 occupants per room'), yaxis = list(title = 'Current cases per capita'), title = "Alameda")
fig_ac_cases_occ_0.5
ac_cases_occ_0.5_model <- lm(cases_by_pop~ `percent more than 0.5 occupants`, ac_curr_cases_dem)
summary(ac_cases_occ_0.5_model)
##
## Call:
## lm(formula = cases_by_pop ~ `percent more than 0.5 occupants`,
## data = ac_curr_cases_dem)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.0024300 -0.0009461 -0.0001091 0.0004828 0.0046568
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.101e-03 7.628e-04 -1.443 0.156185
## `percent more than 0.5 occupants` 7.081e-05 1.774e-05 3.991 0.000245 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.001429 on 44 degrees of freedom
## Multiple R-squared: 0.2658, Adjusted R-squared: 0.2491
## F-statistic: 15.93 on 1 and 44 DF, p-value: 0.0002454
ac_cases_occ_0.5_model_log <- lm(log_cases_by_pop~ `percent more than 0.5 occupants`, ac_curr_cases_dem)
summary(ac_cases_occ_0.5_model_log)
##
## Call:
## lm(formula = log_cases_by_pop ~ `percent more than 0.5 occupants`,
## data = ac_curr_cases_dem)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.7782 -0.4802 0.0876 0.4686 1.6359
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -8.133669 0.376065 -21.628 < 2e-16 ***
## `percent more than 0.5 occupants` 0.036073 0.008746 4.124 0.000162 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7043 on 44 degrees of freedom
## Multiple R-squared: 0.2788, Adjusted R-squared: 0.2624
## F-statistic: 17.01 on 1 and 44 DF, p-value: 0.0001623
# SCC
fig_scc_cases_occ_0.5 <- plot_ly(scc_curr_cases_dem) %>%
add_trace(x = ~`percent more than 0.5 occupants`, y = ~cases_by_pop, type = 'scatter', mode = 'markers', text = ~zipcode) %>%
add_trace(x = ~`percent more than 0.5 occupants`, y = fitted(lm(cases_by_pop~ `percent more than 0.5 occupants`, scc_curr_cases_dem)), mode = 'lines', showlegend = F) %>%
layout(xaxis = list(title = 'Percent more than 0.5 occupants per room'), yaxis = list(title = 'Current cases per capita'), title = "Santa Clara")
fig_scc_cases_occ_0.5
scc_cases_occ_0.5_model <- lm(cases_by_pop~ `percent more than 0.5 occupants`, scc_curr_cases_dem)
summary(scc_cases_occ_0.5_model)
##
## Call:
## lm(formula = cases_by_pop ~ `percent more than 0.5 occupants`,
## data = scc_curr_cases_dem)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.0018484 -0.0004710 -0.0000728 0.0003707 0.0031909
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.822e-04 4.845e-04 -0.582 0.56275
## `percent more than 0.5 occupants` 3.315e-05 1.043e-05 3.178 0.00247 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0008719 on 53 degrees of freedom
## Multiple R-squared: 0.1601, Adjusted R-squared: 0.1442
## F-statistic: 10.1 on 1 and 53 DF, p-value: 0.002472
scc_cases_occ_0.5_model_log <- lm(log_cases_by_pop~ `percent more than 0.5 occupants`, scc_curr_cases_dem %>% filter(cases != 0))
summary(scc_cases_occ_0.5_model_log)
##
## Call:
## lm(formula = log_cases_by_pop ~ `percent more than 0.5 occupants`,
## data = scc_curr_cases_dem %>% filter(cases != 0))
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.15948 -0.26884 0.04851 0.29399 1.33667
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.671811 0.311448 -24.633 < 2e-16 ***
## `percent more than 0.5 occupants` 0.020587 0.006671 3.086 0.00343 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5152 on 46 degrees of freedom
## Multiple R-squared: 0.1715, Adjusted R-squared: 0.1535
## F-statistic: 9.524 on 1 and 46 DF, p-value: 0.003428
Try 2 or more.
# Alameda
fig_ac_cases_occ_2 <- plot_ly(ac_curr_cases_dem) %>%
add_trace(x = ~`percent more than 2 occupants`, y = ~cases_by_pop, type = 'scatter', mode = 'markers', text = ~zipcode) %>%
add_trace(x = ~`percent more than 2 occupants`, y = fitted(lm(cases_by_pop~ `percent more than 2 occupants`, ac_curr_cases_dem)), mode = 'lines', showlegend = F) %>%
layout(xaxis = list(title = 'Percent more than 2 occupants per room'), yaxis = list(title = 'Current cases per capita'), title = "Alameda")
fig_ac_cases_occ_2
ac_cases_occ_2_model <- lm(cases_by_pop~ `percent more than 2 occupants`, ac_curr_cases_dem)
summary(ac_cases_occ_2_model)
##
## Call:
## lm(formula = cases_by_pop ~ `percent more than 2 occupants`,
## data = ac_curr_cases_dem)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.0021828 -0.0006838 -0.0003256 0.0006245 0.0033987
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.0009738 0.0002440 3.991 0.000246 ***
## `percent more than 2 occupants` 0.0014473 0.0002622 5.520 1.7e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.001282 on 44 degrees of freedom
## Multiple R-squared: 0.4092, Adjusted R-squared: 0.3957
## F-statistic: 30.47 on 1 and 44 DF, p-value: 1.703e-06
ac_cases_occ_2_model_log <- lm(log_cases_by_pop~ `percent more than 2 occupants`, ac_curr_cases_dem)
summary(ac_cases_occ_2_model_log)
##
## Call:
## lm(formula = log_cases_by_pop ~ `percent more than 2 occupants`,
## data = ac_curr_cases_dem)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.39455 -0.53594 -0.07895 0.55651 1.45703
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -6.9985 0.1332 -52.53 < 2e-16 ***
## `percent more than 2 occupants` 0.6041 0.1432 4.22 0.00012 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6998 on 44 degrees of freedom
## Multiple R-squared: 0.2881, Adjusted R-squared: 0.2719
## F-statistic: 17.8 on 1 and 44 DF, p-value: 0.0001204
# SCC
fig_scc_cases_occ_2 <- plot_ly(scc_curr_cases_dem) %>%
add_trace(x = ~`percent more than 2 occupants`, y = ~cases_by_pop, type = 'scatter', mode = 'markers', text = ~zipcode) %>%
add_trace(x = ~`percent more than 2 occupants`, y = fitted(lm(cases_by_pop~ `percent more than 2 occupants`, scc_curr_cases_dem)), mode = 'lines', showlegend = F) %>%
layout(xaxis = list(title = 'Percent more than 2 occupants per room'), yaxis = list(title = 'Current cases per capita'), title = "Santa Clara")
fig_scc_cases_occ_2
scc_cases_occ_2_model <- lm(cases_by_pop~ `percent more than 2 occupants`, scc_curr_cases_dem)
summary(scc_cases_occ_2_model)
##
## Call:
## lm(formula = cases_by_pop ~ `percent more than 2 occupants`,
## data = scc_curr_cases_dem)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.0019458 -0.0004797 0.0000242 0.0004582 0.0031705
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.0008980 0.0001737 5.169 3.65e-06 ***
## `percent more than 2 occupants` 0.0004990 0.0001978 2.522 0.0147 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.000899 on 53 degrees of freedom
## Multiple R-squared: 0.1072, Adjusted R-squared: 0.09033
## F-statistic: 6.362 on 1 and 53 DF, p-value: 0.0147
scc_cases_occ_2_model_log <- lm(log_cases_by_pop~ `percent more than 2 occupants`, scc_curr_cases_dem %>% filter(cases != 0))
summary(scc_cases_occ_2_model_log)
##
## Call:
## lm(formula = log_cases_by_pop ~ `percent more than 2 occupants`,
## data = scc_curr_cases_dem %>% filter(cases != 0))
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.1767 -0.2703 0.0544 0.3493 1.3162
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -6.9552 0.1108 -62.754 <2e-16 ***
## `percent more than 2 occupants` 0.3513 0.1308 2.686 0.01 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5263 on 46 degrees of freedom
## Multiple R-squared: 0.1356, Adjusted R-squared: 0.1168
## F-statistic: 7.216 on 1 and 46 DF, p-value: 0.01002
Units in structure.
# Alameda
fig_ac_cases_units_10 <- plot_ly(ac_curr_cases_dem) %>%
add_trace(x = ~`percent 10 or more units`, y = ~cases_by_pop, type = 'scatter', mode = 'markers', text = ~zipcode) %>%
add_trace(x = ~`percent 10 or more units`, y = fitted(lm(cases_by_pop~ `percent 10 or more units`, ac_curr_cases_dem)), mode = 'lines', showlegend = F) %>%
layout(xaxis = list(title = 'Percent 10 or more units per structure'), yaxis = list(title = 'Current cases per capita'), title = "Alameda")
fig_ac_cases_units_10
ac_cases_units_10_model <- lm(cases_by_pop~ `percent 10 or more units`, ac_curr_cases_dem)
summary(ac_cases_units_10_model)
##
## Call:
## lm(formula = cases_by_pop ~ `percent 10 or more units`, data = ac_curr_cases_dem)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.0014979 -0.0011743 -0.0004578 0.0002407 0.0059375
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.900e-03 3.690e-04 5.15 5.87e-06 ***
## `percent 10 or more units` -4.902e-06 1.814e-05 -0.27 0.788
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.001666 on 44 degrees of freedom
## Multiple R-squared: 0.001656, Adjusted R-squared: -0.02103
## F-statistic: 0.07298 on 1 and 44 DF, p-value: 0.7883
ac_cases_units_10_model_log <- lm(log_cases_by_pop~ `percent 10 or more units`, ac_curr_cases_dem)
summary(ac_cases_units_10_model_log)
##
## Call:
## lm(formula = log_cases_by_pop ~ `percent 10 or more units`, data = ac_curr_cases_dem)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.33953 -0.71563 -0.00642 0.48547 1.78699
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -6.671549 0.183603 -36.337 <2e-16 ***
## `percent 10 or more units` 0.001887 0.009028 0.209 0.835
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.8289 on 44 degrees of freedom
## Multiple R-squared: 0.0009919, Adjusted R-squared: -0.02171
## F-statistic: 0.04369 on 1 and 44 DF, p-value: 0.8354
# SCC
fig_scc_cases_units_10 <- plot_ly(scc_curr_cases_dem) %>%
add_trace(x = ~`percent 10 or more units`, y = ~cases_by_pop, type = 'scatter', mode = 'markers', text = ~zipcode) %>%
add_trace(x = ~`percent 10 or more units`, y = fitted(lm(cases_by_pop~ `percent 10 or more units`, scc_curr_cases_dem)), mode = 'lines', showlegend = F) %>%
layout(xaxis = list(title = 'Percent 10 or more units per structure'), yaxis = list(title = 'Current cases per capita'), title = "Santa Clara")
fig_scc_cases_units_10
scc_cases_units_10_model <- lm(cases_by_pop~ `percent 10 or more units`, scc_curr_cases_dem)
summary(scc_cases_units_10_model)
##
## Call:
## lm(formula = cases_by_pop ~ `percent 10 or more units`, data = scc_curr_cases_dem)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.411e-03 -5.609e-04 -9.125e-05 3.514e-04 3.037e-03
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.157e-03 1.827e-04 6.331 5.36e-08 ***
## `percent 10 or more units` 3.089e-06 7.282e-06 0.424 0.673
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0009498 on 53 degrees of freedom
## Multiple R-squared: 0.003383, Adjusted R-squared: -0.01542
## F-statistic: 0.1799 on 1 and 53 DF, p-value: 0.6732
scc_cases_units_10_model_log <- lm(log_cases_by_pop~ `percent 10 or more units`, scc_curr_cases_dem %>% filter(cases != 0))
summary(scc_cases_units_10_model_log)
##
## Call:
## lm(formula = log_cases_by_pop ~ `percent 10 or more units`, data = scc_curr_cases_dem %>%
## filter(cases != 0))
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.22681 -0.36831 -0.03308 0.29044 1.27530
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -6.772782 0.123451 -54.862 <2e-16 ***
## `percent 10 or more units` 0.002021 0.005452 0.371 0.713
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5652 on 46 degrees of freedom
## Multiple R-squared: 0.002978, Adjusted R-squared: -0.0187
## F-statistic: 0.1374 on 1 and 46 DF, p-value: 0.7126
Try with more than one unit.
# Alameda
fig_ac_cases_units_1 <- plot_ly(ac_curr_cases_dem) %>%
add_trace(x = ~`percent more than 1 unit`, y = ~cases_by_pop, type = 'scatter', mode = 'markers', text = ~zipcode) %>%
add_trace(x = ~`percent more than 1 unit`, y = fitted(lm(cases_by_pop~ `percent more than 1 unit`, ac_curr_cases_dem)), mode = 'lines', showlegend = F) %>%
layout(xaxis = list(title = 'Percent more than 1 unit per structure'), yaxis = list(title = 'Current cases per capita'), title = "Alameda")
fig_ac_cases_units_1
ac_cases_units_1_model <- lm(cases_by_pop~ `percent more than 1 unit`, ac_curr_cases_dem)
summary(ac_cases_units_1_model)
##
## Call:
## lm(formula = cases_by_pop ~ `percent more than 1 unit`, data = ac_curr_cases_dem)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.0017272 -0.0011122 -0.0006374 0.0003547 0.0058540
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.623e-03 4.899e-04 3.314 0.00185 **
## `percent more than 1 unit` 5.257e-06 1.101e-05 0.477 0.63548
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.001663 on 44 degrees of freedom
## Multiple R-squared: 0.005152, Adjusted R-squared: -0.01746
## F-statistic: 0.2279 on 1 and 44 DF, p-value: 0.6355
ac_cases_units_1_model_log <- lm(log_cases_by_pop~ `percent more than 1 unit`, ac_curr_cases_dem)
summary(ac_cases_units_1_model_log)
##
## Call:
## lm(formula = log_cases_by_pop ~ `percent more than 1 unit`, data = ac_curr_cases_dem)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.42109 -0.70629 -0.04902 0.51872 1.73989
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -6.746749 0.243625 -27.693 <2e-16 ***
## `percent more than 1 unit` 0.002697 0.005477 0.492 0.625
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.8271 on 44 degrees of freedom
## Multiple R-squared: 0.005479, Adjusted R-squared: -0.01712
## F-statistic: 0.2424 on 1 and 44 DF, p-value: 0.6249
# SCC
fig_scc_cases_units_1 <- plot_ly(scc_curr_cases_dem) %>%
add_trace(x = ~`percent more than 1 unit`, y = ~cases_by_pop, type = 'scatter', mode = 'markers', text = ~zipcode) %>%
add_trace(x = ~`percent more than 1 unit`, y = fitted(lm(cases_by_pop~ `percent more than 1 unit`, scc_curr_cases_dem)), mode = 'lines', showlegend = F) %>%
layout(xaxis = list(title = 'Percent more than 1 unit per structure'), yaxis = list(title = 'Current cases per capita'), title = "Santa Clara")
fig_scc_cases_units_1
scc_cases_units_1_model <- lm(cases_by_pop~ `percent more than 1 unit`, scc_curr_cases_dem)
summary(scc_cases_units_1_model)
##
## Call:
## lm(formula = cases_by_pop ~ `percent more than 1 unit`, data = scc_curr_cases_dem)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.0014604 -0.0005129 -0.0001351 0.0003362 0.0030268
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.053e-03 2.422e-04 4.347 6.29e-05 ***
## `percent more than 1 unit` 4.199e-06 5.437e-06 0.772 0.443
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0009461 on 53 degrees of freedom
## Multiple R-squared: 0.01113, Adjusted R-squared: -0.007532
## F-statistic: 0.5963 on 1 and 53 DF, p-value: 0.4434
scc_cases_units_1_model_log <- lm(log_cases_by_pop~ `percent more than 1 unit`, scc_curr_cases_dem %>% filter(cases != 0))
summary(scc_cases_units_1_model_log)
##
## Call:
## lm(formula = log_cases_by_pop ~ `percent more than 1 unit`, data = scc_curr_cases_dem %>%
## filter(cases != 0))
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.24974 -0.35563 -0.02577 0.29671 1.27341
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -6.795550 0.162824 -41.736 <2e-16 ***
## `percent more than 1 unit` 0.001513 0.003734 0.405 0.687
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5651 on 46 degrees of freedom
## Multiple R-squared: 0.003558, Adjusted R-squared: -0.0181
## F-statistic: 0.1642 on 1 and 46 DF, p-value: 0.6872
Not significant.
Try income.
# Alameda
fig_ac_cases_inc_75 <- plot_ly(ac_curr_cases_dem) %>%
add_trace(x = ~percent_under_75000, y = ~cases_by_pop, type = 'scatter', mode = 'markers', text = ~zipcode) %>%
add_trace(x = ~percent_under_75000, y = fitted(lm(cases_by_pop~ percent_under_75000, ac_curr_cases_dem)), mode = 'lines', showlegend = F) %>%
layout(xaxis = list(title = 'Percent of households earning less than $75,000/yr'), yaxis = list(title = 'Current cases per capita'), title = "Alameda")
fig_ac_cases_inc_75
ac_cases_inc_75_model <- lm(cases_by_pop~ percent_under_75000, ac_curr_cases_dem)
summary(ac_cases_inc_75_model)
##
## Call:
## lm(formula = cases_by_pop ~ percent_under_75000, data = ac_curr_cases_dem)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.0033557 -0.0007076 -0.0001242 0.0004247 0.0042952
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -6.313e-04 5.556e-04 -1.136 0.262
## percent_under_75000 5.909e-05 1.247e-05 4.740 2.26e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.001357 on 44 degrees of freedom
## Multiple R-squared: 0.338, Adjusted R-squared: 0.323
## F-statistic: 22.47 on 1 and 44 DF, p-value: 2.265e-05
ac_cases_inc_75_model_log <- lm(log_cases_by_pop~ percent_under_75000, ac_curr_cases_dem)
summary(ac_cases_inc_75_model_log)
##
## Call:
## lm(formula = log_cases_by_pop ~ percent_under_75000, data = ac_curr_cases_dem)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.19537 -0.35435 0.03465 0.46874 1.20218
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.823860 0.281026 -27.840 < 2e-16 ***
## percent_under_75000 0.028401 0.006305 4.504 4.86e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6861 on 44 degrees of freedom
## Multiple R-squared: 0.3156, Adjusted R-squared: 0.3
## F-statistic: 20.29 on 1 and 44 DF, p-value: 4.861e-05
# SCC
fig_scc_cases_inc_75 <- plot_ly(scc_curr_cases_dem) %>%
add_trace(x = ~percent_under_75000, y = ~cases_by_pop, type = 'scatter', mode = 'markers', text = ~zipcode) %>%
add_trace(x = ~percent_under_75000, y = fitted(lm(cases_by_pop~ percent_under_75000, scc_curr_cases_dem)), mode = 'lines', showlegend = F) %>%
layout(xaxis = list(title = 'Percent of households earning less than $75,000/yr'), yaxis = list(title = 'Current cases per capita'), title = "Santa Clara")
fig_scc_cases_inc_75
scc_cases_inc_75_model <- lm(cases_by_pop~ percent_under_75000, scc_curr_cases_dem)
summary(scc_cases_inc_75_model)
##
## Call:
## lm(formula = cases_by_pop ~ percent_under_75000, data = scc_curr_cases_dem)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.0020102 -0.0004037 -0.0001126 0.0002820 0.0032014
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.182e-05 4.095e-04 0.029 0.97707
## percent_under_75000 3.639e-05 1.189e-05 3.061 0.00346 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.000877 on 53 degrees of freedom
## Multiple R-squared: 0.1502, Adjusted R-squared: 0.1342
## F-statistic: 9.37 on 1 and 53 DF, p-value: 0.003459
scc_cases_inc_75_model_log <- lm(log_cases_by_pop~ percent_under_75000, scc_curr_cases_dem %>% filter(cases != 0))
summary(scc_cases_inc_75_model_log)
##
## Call:
## lm(formula = log_cases_by_pop ~ percent_under_75000, data = scc_curr_cases_dem %>%
## filter(cases != 0))
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.11075 -0.33640 0.02249 0.28161 1.34211
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.691623 0.244837 -31.415 < 2e-16 ***
## percent_under_75000 0.029067 0.007154 4.063 0.000187 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4856 on 46 degrees of freedom
## Multiple R-squared: 0.2641, Adjusted R-squared: 0.2481
## F-statistic: 16.51 on 1 and 46 DF, p-value: 0.0001869
Income as 100,000.
# Alameda
fig_ac_cases_inc_100 <- plot_ly(ac_curr_cases_dem) %>%
add_trace(x = ~percent_under_100000, y = ~cases_by_pop, type = 'scatter', mode = 'markers', text = ~zipcode) %>%
add_trace(x = ~percent_under_100000, y = fitted(lm(cases_by_pop~ percent_under_100000, ac_curr_cases_dem)), mode = 'lines', showlegend = F) %>%
layout(xaxis = list(title = 'Percent of households earning less than $100,000/yr'), yaxis = list(title = 'Current cases per capita'), title = "Alameda")
fig_ac_cases_inc_100
ac_cases_inc_100_model <- lm(cases_by_pop~ percent_under_100000, ac_curr_cases_dem)
summary(ac_cases_inc_100_model)
##
## Call:
## lm(formula = cases_by_pop ~ percent_under_100000, data = ac_curr_cases_dem)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.0030285 -0.0006396 -0.0001944 0.0004365 0.0041642
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.465e-03 6.486e-04 -2.259 0.0289 *
## percent_under_100000 6.226e-05 1.172e-05 5.311 3.43e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.001302 on 44 degrees of freedom
## Multiple R-squared: 0.3907, Adjusted R-squared: 0.3768
## F-statistic: 28.21 on 1 and 44 DF, p-value: 3.428e-06
ac_cases_inc_100_model_log <- lm(log_cases_by_pop~ percent_under_100000, ac_curr_cases_dem)
summary(ac_cases_inc_100_model_log)
##
## Call:
## lm(formula = log_cases_by_pop ~ percent_under_100000, data = ac_curr_cases_dem)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.06459 -0.32849 0.03041 0.40543 1.15621
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -8.28040 0.32257 -25.670 < 2e-16 ***
## percent_under_100000 0.03098 0.00583 5.314 3.39e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6472 on 44 degrees of freedom
## Multiple R-squared: 0.3909, Adjusted R-squared: 0.3771
## F-statistic: 28.24 on 1 and 44 DF, p-value: 3.393e-06
# SCC
fig_scc_cases_inc_100 <- plot_ly(scc_curr_cases_dem) %>%
add_trace(x = ~percent_under_100000, y = ~cases_by_pop, type = 'scatter', mode = 'markers', text = ~zipcode) %>%
add_trace(x = ~percent_under_100000, y = fitted(lm(cases_by_pop~ percent_under_100000, scc_curr_cases_dem)), mode = 'lines', showlegend = F) %>%
layout(xaxis = list(title = 'Percent of households earning less than $100,000/yr'), yaxis = list(title = 'Current cases per capita'), title = "Santa Clara")
fig_scc_cases_inc_100
scc_cases_inc_100_model <- lm(cases_by_pop~ percent_under_100000, scc_curr_cases_dem)
summary(scc_cases_inc_100_model)
##
## Call:
## lm(formula = cases_by_pop ~ percent_under_100000, data = scc_curr_cases_dem)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.827e-03 -4.416e-04 -5.580e-06 2.418e-04 3.129e-03
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -9.287e-05 4.555e-04 -0.204 0.8392
## percent_under_100000 3.011e-05 1.015e-05 2.967 0.0045 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.000881 on 53 degrees of freedom
## Multiple R-squared: 0.1425, Adjusted R-squared: 0.1263
## F-statistic: 8.804 on 1 and 53 DF, p-value: 0.004501
scc_cases_inc_100_model_log <- lm(cases_by_pop~ percent_under_100000, scc_curr_cases_dem %>% filter(cases != 0))
summary(scc_cases_inc_100_model_log)
##
## Call:
## lm(formula = cases_by_pop ~ percent_under_100000, data = scc_curr_cases_dem %>%
## filter(cases != 0))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.0010976 -0.0004645 -0.0001279 0.0001819 0.0029335
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.893e-04 4.195e-04 -0.690 0.493846
## percent_under_100000 3.906e-05 9.428e-06 4.143 0.000145 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.000757 on 46 degrees of freedom
## Multiple R-squared: 0.2717, Adjusted R-squared: 0.2559
## F-statistic: 17.16 on 1 and 46 DF, p-value: 0.0001453
Income as 125,000.
# Alameda
fig_ac_cases_inc_125 <- plot_ly(ac_curr_cases_dem) %>%
add_trace(x = ~percent_under_125000, y = ~cases_by_pop, type = 'scatter', mode = 'markers', text = ~zipcode) %>%
add_trace(x = ~percent_under_125000, y = fitted(lm(cases_by_pop~ percent_under_125000, ac_curr_cases_dem)), mode = 'lines', showlegend = F) %>%
layout(xaxis = list(title = 'Percent of households earning less than $125,000/yr'), yaxis = list(title = 'Current cases per capita'), title = "Alameda")
fig_ac_cases_inc_125
ac_cases_inc_125_model <- lm(cases_by_pop~ percent_under_125000, ac_curr_cases_dem)
summary(ac_cases_inc_125_model)
##
## Call:
## lm(formula = cases_by_pop ~ percent_under_125000, data = ac_curr_cases_dem)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.0028161 -0.0007231 -0.0002092 0.0004190 0.0043649
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.277e-03 7.816e-04 -2.913 0.00561 **
## percent_under_125000 6.556e-05 1.212e-05 5.412 2.45e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.001292 on 44 degrees of freedom
## Multiple R-squared: 0.3996, Adjusted R-squared: 0.386
## F-statistic: 29.29 on 1 and 44 DF, p-value: 2.451e-06
ac_cases_inc_125_model_log <- lm(log_cases_by_pop~ percent_under_125000, ac_curr_cases_dem)
summary(ac_cases_inc_125_model_log)
##
## Call:
## lm(formula = log_cases_by_pop ~ percent_under_125000, data = ac_curr_cases_dem)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.98666 -0.32432 0.01612 0.37468 1.22150
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -8.768576 0.377622 -23.221 < 2e-16 ***
## percent_under_125000 0.033972 0.005853 5.804 6.54e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6242 on 44 degrees of freedom
## Multiple R-squared: 0.4336, Adjusted R-squared: 0.4208
## F-statistic: 33.69 on 1 and 44 DF, p-value: 6.544e-07
# SCC
fig_scc_cases_inc_125 <- plot_ly(scc_curr_cases_dem) %>%
add_trace(x = ~percent_under_125000, y = ~cases_by_pop, type = 'scatter', mode = 'markers', text = ~zipcode) %>%
add_trace(x = ~percent_under_125000, y = fitted(lm(cases_by_pop~ percent_under_125000, scc_curr_cases_dem)), mode = 'lines', showlegend = F) %>%
layout(xaxis = list(title = 'Percent of households earning less than $125,000/yr'), yaxis = list(title = 'Current cases per capita'), title = "Santa Clara")
fig_scc_cases_inc_125
scc_cases_inc_125_model <- lm(cases_by_pop~ percent_under_125000, scc_curr_cases_dem)
summary(scc_cases_inc_125_model)
##
## Call:
## lm(formula = cases_by_pop ~ percent_under_125000, data = scc_curr_cases_dem)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.0016302 -0.0004194 -0.0000520 0.0002166 0.0031734
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.116e-04 5.140e-04 -0.606 0.54695
## percent_under_125000 2.898e-05 9.514e-06 3.046 0.00361 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0008777 on 53 degrees of freedom
## Multiple R-squared: 0.149, Adjusted R-squared: 0.1329
## F-statistic: 9.277 on 1 and 53 DF, p-value: 0.003611
scc_cases_inc_125_model_log <- lm(log_cases_by_pop~ percent_under_125000, scc_curr_cases_dem %>% filter(cases != 0))
summary(scc_cases_inc_125_model_log)
##
## Call:
## lm(formula = log_cases_by_pop ~ percent_under_125000, data = scc_curr_cases_dem %>%
## filter(cases != 0))
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.16009 -0.34031 -0.01414 0.23537 1.31855
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.957829 0.300718 -26.463 < 2e-16 ***
## percent_under_125000 0.023324 0.005596 4.168 0.000134 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4823 on 46 degrees of freedom
## Multiple R-squared: 0.2741, Adjusted R-squared: 0.2584
## F-statistic: 17.37 on 1 and 46 DF, p-value: 0.0001341
The higher thresholds are more effective for Alameda, SCC is about the same for 75,000 and 125,000. So overall, 125,000 seems best.
Try multiple regression analysis with these.
ac_cases_dem_model <- lm(cases_by_pop ~ percent_under_125000 + avg_household_size + pop_density + `percent more than 1 occupant` + `percent more than 1 unit`, ac_curr_cases_dem)
summary(ac_cases_dem_model)
##
## Call:
## lm(formula = cases_by_pop ~ percent_under_125000 + avg_household_size +
## pop_density + `percent more than 1 occupant` + `percent more than 1 unit`,
## data = ac_curr_cases_dem)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.0015848 -0.0004709 -0.0001101 0.0004055 0.0028513
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.735e-03 2.665e-03 1.026 0.3109
## percent_under_125000 5.800e-05 1.662e-05 3.489 0.0012 **
## avg_household_size -1.568e-03 8.867e-04 -1.768 0.0847 .
## pop_density -1.091e-01 8.699e-02 -1.254 0.2171
## `percent more than 1 occupant` 2.626e-04 7.589e-05 3.460 0.0013 **
## `percent more than 1 unit` -4.230e-05 1.584e-05 -2.670 0.0109 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0009469 on 40 degrees of freedom
## Multiple R-squared: 0.7068, Adjusted R-squared: 0.6702
## F-statistic: 19.29 on 5 and 40 DF, p-value: 9.935e-10
scc_cases_dem_model <- lm(cases_by_pop ~ percent_under_125000 + avg_household_size + pop_density + `percent more than 1 occupant` + `percent more than 1 unit`, scc_curr_cases_dem)
summary(scc_cases_dem_model)
##
## Call:
## lm(formula = cases_by_pop ~ percent_under_125000 + avg_household_size +
## pop_density + `percent more than 1 occupant` + `percent more than 1 unit`,
## data = scc_curr_cases_dem)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.0016646 -0.0005053 -0.0000357 0.0003541 0.0034148
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.056e-04 1.719e-03 -0.120 0.905
## percent_under_125000 -4.247e-06 1.341e-05 -0.317 0.753
## avg_household_size 3.082e-04 5.482e-04 0.562 0.577
## pop_density -1.075e-02 7.314e-02 -0.147 0.884
## `percent more than 1 occupant` 8.505e-05 5.151e-05 1.651 0.105
## `percent more than 1 unit` 2.851e-06 1.149e-05 0.248 0.805
##
## Residual standard error: 0.0008112 on 49 degrees of freedom
## Multiple R-squared: 0.3278, Adjusted R-squared: 0.2592
## F-statistic: 4.779 on 5 and 49 DF, p-value: 0.001232
Hmm, that’s interesting. Also with log of cases.
ac_cases_dem_model_log <- lm(log_cases_by_pop ~ percent_under_125000 + avg_household_size + pop_density + `percent more than 1 occupant` + `percent more than 1 unit`, ac_curr_cases_dem)
summary(ac_cases_dem_model_log)
##
## Call:
## lm(formula = log_cases_by_pop ~ percent_under_125000 + avg_household_size +
## pop_density + `percent more than 1 occupant` + `percent more than 1 unit`,
## data = ac_curr_cases_dem)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.84684 -0.29885 -0.07084 0.27367 1.04048
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.611e+00 1.315e+00 -5.786 9.45e-07 ***
## percent_under_125000 4.318e-02 8.205e-03 5.263 5.11e-06 ***
## avg_household_size -4.548e-01 4.376e-01 -1.039 0.30496
## pop_density -1.175e+02 4.293e+01 -2.738 0.00918 **
## `percent more than 1 occupant` 7.064e-02 3.746e-02 1.886 0.06657 .
## `percent more than 1 unit` -1.607e-02 7.818e-03 -2.055 0.04643 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4673 on 40 degrees of freedom
## Multiple R-squared: 0.7114, Adjusted R-squared: 0.6753
## F-statistic: 19.72 on 5 and 40 DF, p-value: 7.33e-10
scc_cases_dem_model_log <- lm(log_cases_by_pop ~ percent_under_125000 + avg_household_size + pop_density + `percent more than 1 occupant` + `percent more than 1 unit`, scc_curr_cases_dem %>% filter(cases != 0))
summary(scc_cases_dem_model_log)
##
## Call:
## lm(formula = log_cases_by_pop ~ percent_under_125000 + avg_household_size +
## pop_density + `percent more than 1 occupant` + `percent more than 1 unit`,
## data = scc_curr_cases_dem %>% filter(cases != 0))
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.08976 -0.25029 -0.01391 0.26471 1.37277
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -9.447739 1.129114 -8.367 1.73e-10 ***
## percent_under_125000 0.016489 0.009430 1.748 0.0877 .
## avg_household_size 0.581402 0.356598 1.630 0.1105
## pop_density -29.946703 58.962583 -0.508 0.6142
## `percent more than 1 occupant` -0.018551 0.034780 -0.533 0.5966
## `percent more than 1 unit` 0.008039 0.007354 1.093 0.2806
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4752 on 42 degrees of freedom
## Multiple R-squared: 0.3566, Adjusted R-squared: 0.28
## F-statistic: 4.656 on 5 and 42 DF, p-value: 0.001801
Just include the variables that were relevant before.
ac_cases_dem_model_2 <- lm(cases_by_pop ~ percent_under_125000 + avg_household_size + `percent more than 1 occupant`, ac_curr_cases_dem)
summary(ac_cases_dem_model_2)
##
## Call:
## lm(formula = cases_by_pop ~ percent_under_125000 + avg_household_size +
## `percent more than 1 occupant`, data = ac_curr_cases_dem)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.0024512 -0.0006169 -0.0000824 0.0004135 0.0033460
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.546e-03 1.976e-03 -1.794 0.0799 .
## percent_under_125000 3.628e-05 1.702e-05 2.131 0.0390 *
## avg_household_size 8.022e-04 5.582e-04 1.437 0.1581
## `percent more than 1 occupant` 1.227e-04 6.832e-05 1.796 0.0796 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.001051 on 42 degrees of freedom
## Multiple R-squared: 0.6209, Adjusted R-squared: 0.5938
## F-statistic: 22.93 on 3 and 42 DF, p-value: 5.987e-09
scc_cases_dem_model_2 <- lm(cases_by_pop ~ percent_under_125000 + avg_household_size + `percent more than 1 occupant`, scc_curr_cases_dem)
summary(scc_cases_dem_model_2)
##
## Call:
## lm(formula = cases_by_pop ~ percent_under_125000 + avg_household_size +
## `percent more than 1 occupant`, data = scc_curr_cases_dem)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.0015811 -0.0005093 -0.0000437 0.0003465 0.0034956
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.483e-04 8.118e-04 0.183 0.85581
## percent_under_125000 -4.015e-06 1.269e-05 -0.316 0.75295
## avg_household_size 1.927e-04 2.297e-04 0.839 0.40557
## `percent more than 1 occupant` 9.362e-05 3.205e-05 2.921 0.00519 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0007959 on 51 degrees of freedom
## Multiple R-squared: 0.3266, Adjusted R-squared: 0.287
## F-statistic: 8.245 on 3 and 51 DF, p-value: 0.0001432
Log of cases.
ac_cases_dem_model_2_log <- lm(log_cases_by_pop ~ percent_under_125000 + avg_household_size + `percent more than 1 occupant`, ac_curr_cases_dem)
summary(ac_cases_dem_model_2_log)
##
## Call:
## lm(formula = log_cases_by_pop ~ percent_under_125000 + avg_household_size +
## `percent more than 1 occupant`, data = ac_curr_cases_dem)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.80150 -0.28202 -0.01959 0.29973 1.37823
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -10.511209 1.019415 -10.311 4.46e-13 ***
## percent_under_125000 0.030664 0.008782 3.492 0.00114 **
## avg_household_size 0.676370 0.287973 2.349 0.02362 *
## `percent more than 1 occupant` 0.006504 0.035247 0.185 0.85448
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5421 on 42 degrees of freedom
## Multiple R-squared: 0.5922, Adjusted R-squared: 0.5631
## F-statistic: 20.33 on 3 and 42 DF, p-value: 2.708e-08
scc_cases_dem_model_2_log <- lm(log_cases_by_pop ~ percent_under_125000 + avg_household_size + `percent more than 1 occupant`, scc_curr_cases_dem %>% filter(cases != 0))
summary(scc_cases_dem_model_2_log)
##
## Call:
## lm(formula = log_cases_by_pop ~ percent_under_125000 + avg_household_size +
## `percent more than 1 occupant`, data = scc_curr_cases_dem %>%
## filter(cases != 0))
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.02152 -0.23485 -0.03309 0.27829 1.60619
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -8.390980 0.540434 -15.526 <2e-16 ***
## percent_under_125000 0.015706 0.008808 1.783 0.0815 .
## avg_household_size 0.255588 0.158128 1.616 0.1132
## `percent more than 1 occupant` 0.007314 0.021760 0.336 0.7384
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4732 on 44 degrees of freedom
## Multiple R-squared: 0.3317, Adjusted R-squared: 0.2861
## F-statistic: 7.278 on 3 and 44 DF, p-value: 0.0004564
I will use 0.0004 cases/person as the baseline level, and look at change in cases 2 weeks after achieving that rate. Also get visits in this range too, using a 7 day lag on visits.
min_baseline_cases = 10
init_baseline_cases_by_pop = 0.0004
init_weeks_later = 2
init_lag_visits = 7
# load the visits data
ac_daily_visits_zip <- readRDS(paste0(sg_path, "weekly-patterns/v2/ac_zip_visits_daily_sum_hourly_03-09-20_05-24-20.rds"))
ac_cases_cutoff <- ac_cases_zip %>%
filter(cases >= min_baseline_cases) %>% # removing ones that didn't hit at least 10 cases per person
filter(cases_by_pop >= init_baseline_cases_by_pop) %>%
mutate(log_cases_by_pop = log(cases_by_pop))
zipcodes_in_cutoff <- unique(ac_cases_cutoff$zipcode)
# get the change in cases and total visit hours in the time period of interest
later_cases_change <- vector(mode = "numeric", length = length(zipcodes_in_cutoff))
later_log_cases_change <- vector(mode = "numeric", length = length(zipcodes_in_cutoff))
total_visits <- vector(mode = "numeric", length = length(zipcodes_in_cutoff))
for (i in 1:length(zipcodes_in_cutoff)) {
curr_zip <- zipcodes_in_cutoff[i]
curr_vals <- ac_cases_cutoff %>% filter(zipcode == curr_zip)
# first need to check that the date ranges chosen here are valid for these zip codes - i.e. there is actually enough case data for this to be valid, and visits data as well
change_cases_curr = NA
change_log_cases_curr = NA
total_visits_curr = NA
if ((max(curr_vals$date) >= curr_vals$date[1] + init_weeks_later*7 + 1) & (max(ac_daily_visits_zip$date) >= curr_vals$date[1] + init_weeks_later*7 + 1 - init_lag_visits) & (min(ac_daily_visits_zip$date) <= curr_vals$date[1] - init_lag_visits)) {
# change in cases an amount of time later
change_cases_curr = curr_vals$cases_by_pop[init_weeks_later*7 + 1] - curr_vals$cases_by_pop[1]
change_log_cases_curr = curr_vals$log_cases_by_pop[init_weeks_later*7 + 1] - curr_vals$log_cases_by_pop[1]
# get the total visits in the time range of interest
visits_curr = ac_daily_visits_zip %>%
filter(zipcode == curr_zip) %>%
filter(date >= (curr_vals$date[1] - init_lag_visits) & date < (curr_vals$date[init_weeks_later*7 + 1] - init_lag_visits)) %>%
summarize(total_visits_high = sum(total_visits_high),
total_visits_low = sum(total_visits_low)) %>%
mutate(total_visits_avg = (total_visits_high + total_visits_low)/2)
total_visits_curr <- visits_curr$total_visits_avg
}
later_cases_change[i] = change_cases_curr
later_log_cases_change[i] = change_log_cases_curr
total_visits[i] = total_visits_curr
}
# combine
ac_data_change <- data.frame(zipcodes_in_cutoff, later_cases_change, later_log_cases_change, total_visits, stringsAsFactors = F) %>%
filter(!is.na(later_cases_change) & !is.na(total_visits)) %>% # only select ones that had valid date ranges
rename(zipcode = zipcodes_in_cutoff) %>%
left_join(ac_cases_zip %>% filter(date == max(date)) %>% dplyr::select(zipcode, total_pop_zip)) %>%
mutate(visits_per_capita = total_visits / total_pop_zip)
# get linear model values for cases and visits
ac_visits_cases_change_model <- lm(later_cases_change ~ visits_per_capita, ac_data_change)
ac_data_change %>%
plot_ly() %>%
add_trace(x = ~visits_per_capita, y = ~later_cases_change, type = 'scatter', mode = 'markers', text = ~zipcode) %>%
add_trace(x = ~visits_per_capita, y = fitted(lm(later_cases_change ~ visits_per_capita, ac_data_change)), mode = 'lines', showlegend = F, text = paste("R squared:", summary.lm(ac_visits_cases_change_model)$r.squared)) %>%
layout(xaxis = list(title = 'Total visit-hours per person'), yaxis = list(title = 'Change in cases per person from baseline 0.0004'), title = "Alameda, 2 weeks after baseline")
summary(ac_visits_cases_change_model)
##
## Call:
## lm(formula = later_cases_change ~ visits_per_capita, data = ac_data_change)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.761e-04 -1.728e-04 -6.878e-05 2.618e-04 6.193e-04
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.0004317 0.0003538 -1.220 0.2311
## visits_per_capita 0.0000898 0.0000393 2.285 0.0289 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0002873 on 33 degrees of freedom
## Multiple R-squared: 0.1366, Adjusted R-squared: 0.1104
## F-statistic: 5.22 on 1 and 33 DF, p-value: 0.02888
# log
ac_visits_cases_change_model_log <- lm(later_log_cases_change ~ visits_per_capita, ac_data_change)
ac_data_change %>%
plot_ly() %>%
add_trace(x = ~visits_per_capita, y = ~later_log_cases_change, type = 'scatter', mode = 'markers', text = ~zipcode) %>%
add_trace(x = ~visits_per_capita, y = fitted(lm(later_log_cases_change ~ visits_per_capita, ac_data_change)), mode = 'lines', showlegend = F, text = paste("R squared:", summary.lm(ac_visits_cases_change_model_log)$r.squared)) %>%
layout(xaxis = list(title = 'Total visit-hours per person'), yaxis = list(title = 'Change in log(cases per person) from baseline 0.0004'), title = "Alameda, 2 weeks after baseline")
summary(ac_visits_cases_change_model_log)
##
## Call:
## lm(formula = later_log_cases_change ~ visits_per_capita, data = ac_data_change)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.60722 -0.21542 -0.01093 0.25297 0.61235
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.4657 0.4177 -1.115 0.2730
## visits_per_capita 0.1120 0.0464 2.415 0.0215 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3392 on 33 degrees of freedom
## Multiple R-squared: 0.1501, Adjusted R-squared: 0.1244
## F-statistic: 5.83 on 1 and 33 DF, p-value: 0.02146
Repeat the demographic variable correlations. Without plots this time to reduce space.
ac_data_change <- ac_data_change %>% left_join(ac_curr_cases_dem %>% dplyr::select(-c(date, cases, cases_by_pop, total_pop_zip, geometry)))
runCorrelationsAl <- function(ac_data_change) {
# household size
ac_cases_change_hhs_model <- lm(later_cases_change~ avg_household_size, ac_data_change)
print(summary(ac_cases_change_hhs_model))
# also do with log of cases
ac_cases_change_hhs_model_log <- lm(later_log_cases_change~ avg_household_size, ac_data_change)
print(summary(ac_cases_change_hhs_model_log))
# population density
ac_cases_change_pop_dens_model <- lm(later_cases_change~ pop_density, ac_data_change)
print(summary(ac_cases_change_pop_dens_model))
# also do with log of cases
ac_cases_change_pop_dens_model_log <- lm(later_log_cases_change~ pop_density, ac_data_change)
print(summary(ac_cases_change_pop_dens_model_log))
# occupants per room
ac_cases_change_occ_1_model <- lm(later_cases_change~ `percent more than 1 occupant`, ac_data_change)
print(summary(ac_cases_change_occ_1_model))
# also do with log of cases
ac_cases_change_occ_1_model_log <- lm(later_log_cases_change~ `percent more than 1 occupant`, ac_data_change)
print(summary(ac_cases_change_occ_1_model_log))
# more than 0.5 occupants
ac_cases_change_occ_0.5_model <- lm(later_cases_change~ `percent more than 0.5 occupants`, ac_data_change)
print(summary(ac_cases_change_occ_0.5_model))
# also do with log of cases
ac_cases_change_occ_0.5_model_log <- lm(later_log_cases_change~ `percent more than 0.5 occupants`, ac_data_change)
print(summary(ac_cases_change_occ_0.5_model_log))
# units in structure
ac_cases_change_units_10_model <- lm(later_cases_change~ `percent 10 or more units`, ac_data_change)
print(summary(ac_cases_change_units_10_model))
# also do with log of cases
ac_cases_change_units_10_model_log <- lm(later_log_cases_change~ `percent 10 or more units`, ac_data_change)
print(summary(ac_cases_change_units_10_model_log))
# more than 1 unit
ac_cases_change_units_1_model <- lm(later_cases_change~ `percent more than 1 unit`, ac_data_change)
print(summary(ac_cases_change_units_1_model))
# also do with log of cases
ac_cases_change_units_1_model_log <- lm(later_log_cases_change~ `percent more than 1 unit`, ac_data_change)
print(summary(ac_cases_change_units_1_model_log))
# income
ac_cases_change_inc_75_model <- lm(later_cases_change~ percent_under_75000, ac_data_change)
print(summary(ac_cases_change_inc_75_model))
# also do with log of cases
ac_cases_change_inc_75_model_log <- lm(later_log_cases_change~ percent_under_75000, ac_data_change)
print(summary(ac_cases_change_inc_75_model_log))
ac_cases_change_inc_100_model <- lm(later_cases_change~ percent_under_100000, ac_data_change)
print(summary(ac_cases_change_inc_100_model))
# also do with log of cases
ac_cases_change_inc_100_model_log <- lm(later_log_cases_change~ percent_under_100000, ac_data_change)
print(summary(ac_cases_change_inc_100_model_log))
ac_cases_change_inc_125_model <- lm(later_cases_change~ percent_under_125000, ac_data_change)
print(summary(ac_cases_change_inc_125_model))
# also do with log of cases
ac_cases_change_inc_125_model_log <- lm(later_log_cases_change~ percent_under_125000, ac_data_change)
print(summary(ac_cases_change_inc_125_model_log))
}
runCorrelationsAl(ac_data_change)
##
## Call:
## lm(formula = later_cases_change ~ avg_household_size, data = ac_data_change)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.156e-04 -2.063e-04 -4.247e-05 1.727e-04 5.461e-04
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.0004576 0.0003011 -1.52 0.13808
## avg_household_size 0.0002842 0.0001022 2.78 0.00891 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0002783 on 33 degrees of freedom
## Multiple R-squared: 0.1898, Adjusted R-squared: 0.1652
## F-statistic: 7.728 on 1 and 33 DF, p-value: 0.008909
##
##
## Call:
## lm(formula = later_log_cases_change ~ avg_household_size, data = ac_data_change)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.58478 -0.26170 0.01507 0.25189 0.56696
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.4458 0.3587 -1.243 0.22266
## avg_household_size 0.3366 0.1218 2.764 0.00927 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3316 on 33 degrees of freedom
## Multiple R-squared: 0.188, Adjusted R-squared: 0.1634
## F-statistic: 7.639 on 1 and 33 DF, p-value: 0.00927
##
##
## Call:
## lm(formula = later_cases_change ~ pop_density, data = ac_data_change)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.663e-04 -2.714e-04 -9.122e-05 2.716e-04 7.693e-04
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.0003661 0.0000860 4.257 0.000161 ***
## pop_density 0.0011890 0.0271285 0.044 0.965306
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0003092 on 33 degrees of freedom
## Multiple R-squared: 5.82e-05, Adjusted R-squared: -0.03024
## F-statistic: 0.001921 on 1 and 33 DF, p-value: 0.9653
##
##
## Call:
## lm(formula = later_log_cases_change ~ pop_density, data = ac_data_change)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.5225 -0.3242 -0.0900 0.3006 0.8031
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.5217 0.1023 5.099 1.38e-05 ***
## pop_density 4.6313 32.2748 0.143 0.887
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3679 on 33 degrees of freedom
## Multiple R-squared: 0.0006236, Adjusted R-squared: -0.02966
## F-statistic: 0.02059 on 1 and 33 DF, p-value: 0.8868
##
##
## Call:
## lm(formula = later_cases_change ~ `percent more than 1 occupant`,
## data = ac_data_change)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.332e-04 -1.693e-04 -7.690e-06 1.320e-04 5.607e-04
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.081e-05 7.559e-05 1.069 0.293
## `percent more than 1 occupant` 3.637e-05 8.011e-06 4.540 7.1e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0002426 on 33 degrees of freedom
## Multiple R-squared: 0.3845, Adjusted R-squared: 0.3658
## F-statistic: 20.61 on 1 and 33 DF, p-value: 7.104e-05
##
##
## Call:
## lm(formula = later_log_cases_change ~ `percent more than 1 occupant`,
## data = ac_data_change)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.50233 -0.21460 0.05337 0.19407 0.71268
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.183581 0.088837 2.067 0.0467 *
## `percent more than 1 occupant` 0.044128 0.009414 4.687 4.62e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2851 on 33 degrees of freedom
## Multiple R-squared: 0.3997, Adjusted R-squared: 0.3815
## F-statistic: 21.97 on 1 and 33 DF, p-value: 4.622e-05
##
##
## Call:
## lm(formula = later_cases_change ~ `percent more than 0.5 occupants`,
## data = ac_data_change)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.428e-04 -2.473e-04 -8.570e-06 2.242e-04 5.800e-04
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.314e-04 1.992e-04 -0.660 0.5141
## `percent more than 0.5 occupants` 1.143e-05 4.419e-06 2.587 0.0143 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.000282 on 33 degrees of freedom
## Multiple R-squared: 0.1686, Adjusted R-squared: 0.1434
## F-statistic: 6.693 on 1 and 33 DF, p-value: 0.01427
##
##
## Call:
## lm(formula = later_log_cases_change ~ `percent more than 0.5 occupants`,
## data = ac_data_change)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.52310 -0.27526 -0.01809 0.28931 0.59815
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.069643 0.236506 -0.294 0.770
## `percent more than 0.5 occupants` 0.013776 0.005246 2.626 0.013 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3347 on 33 degrees of freedom
## Multiple R-squared: 0.1728, Adjusted R-squared: 0.1478
## F-statistic: 6.896 on 1 and 33 DF, p-value: 0.013
##
##
## Call:
## lm(formula = later_cases_change ~ `percent 10 or more units`,
## data = ac_data_change)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.013e-04 -2.548e-04 -8.722e-05 2.641e-04 7.579e-04
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.014e-04 8.043e-05 4.991 1.89e-05 ***
## `percent 10 or more units` -1.976e-06 3.750e-06 -0.527 0.602
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0003079 on 33 degrees of freedom
## Multiple R-squared: 0.008348, Adjusted R-squared: -0.0217
## F-statistic: 0.2778 on 1 and 33 DF, p-value: 0.6017
##
##
## Call:
## lm(formula = later_log_cases_change ~ `percent 10 or more units`,
## data = ac_data_change)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.56708 -0.30077 -0.07125 0.28156 0.78729
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.567209 0.095802 5.921 1.22e-06 ***
## `percent 10 or more units` -0.002069 0.004466 -0.463 0.646
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3668 on 33 degrees of freedom
## Multiple R-squared: 0.00646, Adjusted R-squared: -0.02365
## F-statistic: 0.2146 on 1 and 33 DF, p-value: 0.6463
##
##
## Call:
## lm(formula = later_cases_change ~ `percent more than 1 unit`,
## data = ac_data_change)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.619e-04 -2.721e-04 -9.047e-05 2.704e-04 7.669e-04
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.614e-04 1.102e-04 3.280 0.00246 **
## `percent more than 1 unit` 2.031e-07 2.557e-06 0.079 0.93716
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0003092 on 33 degrees of freedom
## Multiple R-squared: 0.0001912, Adjusted R-squared: -0.03011
## F-statistic: 0.006312 on 1 and 33 DF, p-value: 0.9372
##
##
## Call:
## lm(formula = later_log_cases_change ~ `percent more than 1 unit`,
## data = ac_data_change)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.51724 -0.32555 -0.09264 0.29872 0.79562
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.5162009 0.1311077 3.937 0.000402 ***
## `percent more than 1 unit` 0.0004527 0.0030420 0.149 0.882596
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3679 on 33 degrees of freedom
## Multiple R-squared: 0.0006707, Adjusted R-squared: -0.02961
## F-statistic: 0.02215 on 1 and 33 DF, p-value: 0.8826
##
##
## Call:
## lm(formula = later_cases_change ~ percent_under_75000, data = ac_data_change)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.950e-04 -1.698e-04 -8.015e-05 2.037e-04 5.911e-04
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.869e-05 1.333e-04 -0.290 0.77345
## percent_under_75000 9.459e-06 2.907e-06 3.254 0.00263 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0002691 on 33 degrees of freedom
## Multiple R-squared: 0.2429, Adjusted R-squared: 0.22
## F-statistic: 10.59 on 1 and 33 DF, p-value: 0.002628
##
##
## Call:
## lm(formula = later_log_cases_change ~ percent_under_75000, data = ac_data_change)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.51572 -0.22338 -0.05969 0.26202 0.74802
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.041832 0.157987 0.265 0.79283
## percent_under_75000 0.011402 0.003445 3.310 0.00227 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3189 on 33 degrees of freedom
## Multiple R-squared: 0.2492, Adjusted R-squared: 0.2265
## F-statistic: 10.96 on 1 and 33 DF, p-value: 0.002265
##
##
## Call:
## lm(formula = later_cases_change ~ percent_under_100000, data = ac_data_change)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.992e-04 -1.651e-04 -3.765e-05 1.924e-04 6.103e-04
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.839e-04 1.568e-04 -1.172 0.249400
## percent_under_100000 1.011e-05 2.751e-06 3.674 0.000841 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0002605 on 33 degrees of freedom
## Multiple R-squared: 0.2903, Adjusted R-squared: 0.2688
## F-statistic: 13.5 on 1 and 33 DF, p-value: 0.0008407
##
##
## Call:
## lm(formula = later_log_cases_change ~ percent_under_100000, data = ac_data_change)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.50882 -0.20316 -0.04563 0.24008 0.77156
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.134148 0.185533 -0.723 0.474749
## percent_under_100000 0.012199 0.003254 3.749 0.000683 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3082 on 33 degrees of freedom
## Multiple R-squared: 0.2987, Adjusted R-squared: 0.2774
## F-statistic: 14.05 on 1 and 33 DF, p-value: 0.0006825
##
##
## Call:
## lm(formula = later_cases_change ~ percent_under_125000, data = ac_data_change)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.983e-04 -1.886e-04 -3.339e-05 1.615e-04 5.976e-04
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.364e-04 1.906e-04 -1.765 0.086766 .
## percent_under_125000 1.090e-05 2.866e-06 3.803 0.000586 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0002578 on 33 degrees of freedom
## Multiple R-squared: 0.3047, Adjusted R-squared: 0.2837
## F-statistic: 14.46 on 1 and 33 DF, p-value: 0.0005862
##
##
## Call:
## lm(formula = later_log_cases_change ~ percent_under_125000, data = ac_data_change)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.47456 -0.20218 -0.06555 0.23659 0.75644
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.318931 0.225242 -1.416 0.166161
## percent_under_125000 0.013168 0.003388 3.887 0.000463 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3048 on 33 degrees of freedom
## Multiple R-squared: 0.3141, Adjusted R-squared: 0.2933
## F-statistic: 15.11 on 1 and 33 DF, p-value: 0.0004631
Try multiple regression analysis with these.
ac_cases_dem_model_visits <- lm(later_cases_change ~ percent_under_125000 + avg_household_size + pop_density + `percent more than 1 occupant` + `percent more than 1 unit` + visits_per_capita, ac_data_change)
summary(ac_cases_dem_model_visits)
##
## Call:
## lm(formula = later_cases_change ~ percent_under_125000 + avg_household_size +
## pop_density + `percent more than 1 occupant` + `percent more than 1 unit` +
## visits_per_capita, data = ac_data_change)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.055e-04 -1.512e-04 -2.944e-05 1.379e-04 6.612e-04
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.451e-04 9.952e-04 -0.146 0.8851
## percent_under_125000 1.205e-05 4.409e-06 2.734 0.0107 *
## avg_household_size 1.204e-06 3.133e-04 0.004 0.9970
## pop_density -4.907e-02 3.133e-02 -1.567 0.1284
## `percent more than 1 occupant` 2.201e-05 2.407e-05 0.915 0.3683
## `percent more than 1 unit` -2.943e-06 5.490e-06 -0.536 0.5962
## visits_per_capita -2.341e-05 4.700e-05 -0.498 0.6224
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0002302 on 28 degrees of freedom
## Multiple R-squared: 0.5297, Adjusted R-squared: 0.429
## F-statistic: 5.257 on 6 and 28 DF, p-value: 0.0009801
ac_cases_dem_model_visits_log <- lm(later_log_cases_change ~ percent_under_125000 + avg_household_size + pop_density + `percent more than 1 occupant` + `percent more than 1 unit` + visits_per_capita, ac_data_change)
summary(ac_cases_dem_model_visits_log)
##
## Call:
## lm(formula = later_log_cases_change ~ percent_under_125000 +
## avg_household_size + pop_density + `percent more than 1 occupant` +
## `percent more than 1 unit` + visits_per_capita, data = ac_data_change)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.35512 -0.19725 -0.04894 0.16358 0.81897
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.079377 1.182353 -0.067 0.947
## percent_under_125000 0.013729 0.005238 2.621 0.014 *
## avg_household_size -0.032296 0.372196 -0.087 0.931
## pop_density -52.065401 37.215592 -1.399 0.173
## `percent more than 1 occupant` 0.028072 0.028599 0.982 0.335
## `percent more than 1 unit` -0.003730 0.006522 -0.572 0.572
## visits_per_capita -0.014785 0.055838 -0.265 0.793
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2735 on 28 degrees of freedom
## Multiple R-squared: 0.5313, Adjusted R-squared: 0.4309
## F-statistic: 5.29 on 6 and 28 DF, p-value: 0.00094
Huh, again interesting.
Just include the variables that were relevant before.
ac_cases_dem_model_visits_2 <- lm(later_cases_change ~ percent_under_125000 + avg_household_size + `percent more than 1 occupant` + visits_per_capita, ac_data_change)
summary(ac_cases_dem_model_visits_2)
##
## Call:
## lm(formula = later_cases_change ~ percent_under_125000 + avg_household_size +
## `percent more than 1 occupant` + visits_per_capita, data = ac_data_change)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.0003573 -0.0001666 -0.0000494 0.0001466 0.0006220
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -9.940e-04 5.188e-04 -1.916 0.0650 .
## percent_under_125000 9.605e-06 4.235e-06 2.268 0.0307 *
## avg_household_size 2.392e-04 1.540e-04 1.553 0.1308
## `percent more than 1 occupant` 4.693e-06 1.589e-05 0.295 0.7698
## visits_per_capita 9.312e-07 4.520e-05 0.021 0.9837
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.000234 on 30 degrees of freedom
## Multiple R-squared: 0.4793, Adjusted R-squared: 0.4099
## F-statistic: 6.903 on 4 and 30 DF, p-value: 0.0004594
ac_cases_dem_model_visits_log_2 <- lm(later_log_cases_change ~percent_under_125000 + avg_household_size + `percent more than 1 occupant` + visits_per_capita, ac_data_change)
summary(ac_cases_dem_model_visits_log_2)
##
## Call:
## lm(formula = later_log_cases_change ~ percent_under_125000 +
## avg_household_size + `percent more than 1 occupant` + visits_per_capita,
## data = ac_data_change)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.37323 -0.20158 -0.06058 0.18980 0.77190
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.074013 0.611642 -1.756 0.0893 .
## percent_under_125000 0.011095 0.004993 2.222 0.0340 *
## avg_household_size 0.250766 0.181552 1.381 0.1774
## `percent more than 1 occupant` 0.007669 0.018736 0.409 0.6852
## visits_per_capita 0.011114 0.053290 0.209 0.8362
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2759 on 30 degrees of freedom
## Multiple R-squared: 0.489, Adjusted R-squared: 0.4208
## F-statistic: 7.177 on 4 and 30 DF, p-value: 0.0003527
Try also with 5 week change in cases after the baseline level.
init_weeks_later = 5
# get the change in cases and total visit hours in the time period of interest
later_cases_change <- vector(mode = "numeric", length = length(zipcodes_in_cutoff))
later_log_cases_change <- vector(mode = "numeric", length = length(zipcodes_in_cutoff))
total_visits <- vector(mode = "numeric", length = length(zipcodes_in_cutoff))
for (i in 1:length(zipcodes_in_cutoff)) {
curr_zip <- zipcodes_in_cutoff[i]
curr_vals <- ac_cases_cutoff %>% filter(zipcode == curr_zip)
# first need to check that the date ranges chosen here are valid for these zip codes - i.e. there is actually enough case data for this to be valid, and visits data as well
change_cases_curr = NA
change_log_cases_curr = NA
total_visits_curr = NA
# check cases first
if ((max(curr_vals$date) >= curr_vals$date[1] + init_weeks_later*7 + 1)) {
# change in cases an amount of time later
change_cases_curr = curr_vals$cases_by_pop[init_weeks_later*7 + 1] - curr_vals$cases_by_pop[1]
change_log_cases_curr = curr_vals$log_cases_by_pop[init_weeks_later*7 + 1] - curr_vals$log_cases_by_pop[1]
}
# check visits
if ((max(ac_daily_visits_zip$date) >= curr_vals$date[1] + init_weeks_later*7 + 1 - init_lag_visits) & (min(ac_daily_visits_zip$date) <= curr_vals$date[1] - init_lag_visits)) {
# get the total visits in the time range of interest
visits_curr = ac_daily_visits_zip %>%
filter(zipcode == curr_zip) %>%
filter(date >= (curr_vals$date[1] - init_lag_visits) & date < (curr_vals$date[init_weeks_later*7 + 1] - init_lag_visits)) %>%
summarize(total_visits_high = sum(total_visits_high),
total_visits_low = sum(total_visits_low)) %>%
mutate(total_visits_avg = (total_visits_high + total_visits_low)/2)
total_visits_curr <- visits_curr$total_visits_avg
}
later_cases_change[i] = change_cases_curr
later_log_cases_change[i] = change_log_cases_curr
total_visits[i] = total_visits_curr
}
# combine
ac_data_change_5 <- data.frame(zipcodes_in_cutoff, later_cases_change, later_log_cases_change, total_visits, stringsAsFactors = F) %>%
filter(!is.na(later_cases_change)) %>%
rename(zipcode = zipcodes_in_cutoff) %>%
left_join(ac_cases_zip %>% filter(date == max(date)) %>% dplyr::select(zipcode, total_pop_zip)) %>%
mutate(visits_per_capita = total_visits / total_pop_zip)
# get linear model values for cases and visits
ac_visits_cases_change_model_5 <- lm(later_cases_change ~ visits_per_capita, ac_data_change_5 %>% filter(!is.na(visits_per_capita)))
ac_data_change_5 %>% filter(!is.na(visits_per_capita)) %>%
plot_ly() %>%
add_trace(x = ~visits_per_capita, y = ~later_cases_change, type = 'scatter', mode = 'markers', text = ~zipcode) %>%
add_trace(x = ~visits_per_capita, y = fitted(lm(later_cases_change ~ visits_per_capita, ac_data_change_5 %>% filter(!is.na(visits_per_capita)))), mode = 'lines', showlegend = F, text = paste("R squared:", summary.lm(ac_visits_cases_change_model_5)$r.squared)) %>%
layout(xaxis = list(title = 'Total visit-hours per person'), yaxis = list(title = 'Change in cases per person from baseline 0.0004'), title = "Alameda, 5 weeks after baseline")
summary(ac_visits_cases_change_model_5)
##
## Call:
## lm(formula = later_cases_change ~ visits_per_capita, data = ac_data_change_5 %>%
## filter(!is.na(visits_per_capita)))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.0010994 -0.0005514 -0.0001878 0.0002261 0.0025410
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.0016203 0.0011815 -1.371 0.1811
## visits_per_capita 0.0001196 0.0000512 2.337 0.0268 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0008618 on 28 degrees of freedom
## Multiple R-squared: 0.1632, Adjusted R-squared: 0.1333
## F-statistic: 5.46 on 1 and 28 DF, p-value: 0.02683
# log
ac_visits_cases_change_model_5_log <- lm(later_log_cases_change ~ visits_per_capita, ac_data_change_5 %>% filter(!is.na(visits_per_capita)))
ac_data_change_5 %>% filter(!is.na(visits_per_capita)) %>%
plot_ly() %>%
add_trace(x = ~visits_per_capita, y = ~later_log_cases_change, type = 'scatter', mode = 'markers', text = ~zipcode) %>%
add_trace(x = ~visits_per_capita, y = fitted(lm(later_log_cases_change ~ visits_per_capita, ac_data_change_5 %>% filter(!is.na(visits_per_capita)))), mode = 'lines', showlegend = F, text = paste("R squared:", summary.lm(ac_visits_cases_change_model_5_log)$r.squared)) %>%
layout(xaxis = list(title = 'Total visit-hours per person'), yaxis = list(title = 'Change in log(cases/person) from baseline 0.0004'), title = "Alameda, 5 weeks after baseline")
summary(ac_visits_cases_change_model_5_log)
##
## Call:
## lm(formula = later_log_cases_change ~ visits_per_capita, data = ac_data_change_5 %>%
## filter(!is.na(visits_per_capita)))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.85287 -0.31588 -0.06965 0.30249 1.12272
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.91402 0.67267 -1.359 0.18506
## visits_per_capita 0.08820 0.02915 3.026 0.00527 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4906 on 28 degrees of freedom
## Multiple R-squared: 0.2464, Adjusted R-squared: 0.2195
## F-statistic: 9.155 on 1 and 28 DF, p-value: 0.00527
Correlations analysis.
ac_data_change_5 <- ac_data_change_5 %>% left_join(ac_curr_cases_dem %>% dplyr::select(-c(date, cases, cases_by_pop, total_pop_zip, geometry)))
runCorrelationsAl(ac_data_change_5)
##
## Call:
## lm(formula = later_cases_change ~ avg_household_size, data = ac_data_change)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.0011939 -0.0006046 -0.0001743 0.0003696 0.0024225
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.0010847 0.0009633 -1.126 0.2691
## avg_household_size 0.0007360 0.0003260 2.258 0.0314 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0008647 on 30 degrees of freedom
## Multiple R-squared: 0.1452, Adjusted R-squared: 0.1167
## F-statistic: 5.097 on 1 and 30 DF, p-value: 0.03141
##
##
## Call:
## lm(formula = later_log_cases_change ~ avg_household_size, data = ac_data_change)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.98839 -0.31056 -0.09891 0.41259 1.12215
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.3039 0.5822 -0.522 0.6054
## avg_household_size 0.4688 0.1970 2.379 0.0239 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5226 on 30 degrees of freedom
## Multiple R-squared: 0.1587, Adjusted R-squared: 0.1307
## F-statistic: 5.661 on 1 and 30 DF, p-value: 0.02391
##
##
## Call:
## lm(formula = later_cases_change ~ pop_density, data = ac_data_change)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.0008933 -0.0007415 -0.0001685 0.0003240 0.0025253
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.0008821 0.0002707 3.259 0.00278 **
## pop_density 0.0713886 0.0853960 0.836 0.40978
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0009245 on 30 degrees of freedom
## Multiple R-squared: 0.02276, Adjusted R-squared: -0.00981
## F-statistic: 0.6988 on 1 and 30 DF, p-value: 0.4098
##
##
## Call:
## lm(formula = later_log_cases_change ~ pop_density, data = ac_data_change)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.74996 -0.48368 0.00446 0.29408 1.11668
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.9346 0.1642 5.693 3.3e-06 ***
## pop_density 51.0826 51.7917 0.986 0.332
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5607 on 30 degrees of freedom
## Multiple R-squared: 0.03141, Adjusted R-squared: -0.0008781
## F-statistic: 0.9728 on 1 and 30 DF, p-value: 0.3319
##
##
## Call:
## lm(formula = later_cases_change ~ `percent more than 1 occupant`,
## data = ac_data_change)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.0010973 -0.0003509 -0.0001076 0.0002054 0.0023817
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.392e-05 2.189e-04 0.109 0.914
## `percent more than 1 occupant` 1.263e-04 2.259e-05 5.591 4.4e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0006545 on 30 degrees of freedom
## Multiple R-squared: 0.5102, Adjusted R-squared: 0.4939
## F-statistic: 31.25 on 1 and 30 DF, p-value: 4.4e-06
##
##
## Call:
## lm(formula = later_log_cases_change ~ `percent more than 1 occupant`,
## data = ac_data_change)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.5820 -0.2072 -0.0329 0.1457 1.2277
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.40512 0.12740 3.18 0.00341 **
## `percent more than 1 occupant` 0.08007 0.01315 6.09 1.09e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.381 on 30 degrees of freedom
## Multiple R-squared: 0.5528, Adjusted R-squared: 0.5379
## F-statistic: 37.08 on 1 and 30 DF, p-value: 1.086e-06
##
##
## Call:
## lm(formula = later_cases_change ~ `percent more than 0.5 occupants`,
## data = ac_data_change)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.0011819 -0.0005668 -0.0001210 0.0002660 0.0024531
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.059e-04 6.350e-04 -1.112 0.2751
## `percent more than 0.5 occupants` 3.992e-05 1.395e-05 2.862 0.0076 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0008289 on 30 degrees of freedom
## Multiple R-squared: 0.2145, Adjusted R-squared: 0.1883
## F-statistic: 8.192 on 1 and 30 DF, p-value: 0.007598
##
##
## Call:
## lm(formula = later_log_cases_change ~ `percent more than 0.5 occupants`,
## data = ac_data_change)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.76471 -0.29498 -0.05544 0.29712 1.29441
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.139909 0.373492 -0.375 0.71060
## `percent more than 0.5 occupants` 0.027172 0.008204 3.312 0.00242 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4875 on 30 degrees of freedom
## Multiple R-squared: 0.2677, Adjusted R-squared: 0.2433
## F-statistic: 10.97 on 1 and 30 DF, p-value: 0.002423
##
##
## Call:
## lm(formula = later_cases_change ~ `percent 10 or more units`,
## data = ac_data_change)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.0010631 -0.0006383 -0.0001908 0.0002794 0.0027544
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.185e-03 2.560e-04 4.627 6.66e-05 ***
## `percent 10 or more units` -7.231e-06 1.162e-05 -0.622 0.538
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0009292 on 30 degrees of freedom
## Multiple R-squared: 0.01274, Adjusted R-squared: -0.02017
## F-statistic: 0.3872 on 1 and 30 DF, p-value: 0.5385
##
##
## Call:
## lm(formula = later_log_cases_change ~ `percent 10 or more units`,
## data = ac_data_change)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.8909 -0.4010 -0.0199 0.3763 1.2849
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.124364 0.156309 7.193 5.27e-08 ***
## `percent 10 or more units` -0.003591 0.007094 -0.506 0.616
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5673 on 30 degrees of freedom
## Multiple R-squared: 0.00847, Adjusted R-squared: -0.02458
## F-statistic: 0.2563 on 1 and 30 DF, p-value: 0.6164
##
##
## Call:
## lm(formula = later_cases_change ~ `percent more than 1 unit`,
## data = ac_data_change)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.0009389 -0.0006809 -0.0001951 0.0002303 0.0027043
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.958e-04 3.559e-04 2.517 0.0174 *
## `percent more than 1 unit` 4.272e-06 8.087e-06 0.528 0.6012
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0009309 on 30 degrees of freedom
## Multiple R-squared: 0.009214, Adjusted R-squared: -0.02381
## F-statistic: 0.279 on 1 and 30 DF, p-value: 0.6012
##
##
## Call:
## lm(formula = later_log_cases_change ~ `percent more than 1 unit`,
## data = ac_data_change)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.78978 -0.44547 0.00088 0.28860 1.24165
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.936981 0.216271 4.332 0.000152 ***
## `percent more than 1 unit` 0.003246 0.004914 0.661 0.513895
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5656 on 30 degrees of freedom
## Multiple R-squared: 0.01434, Adjusted R-squared: -0.01852
## F-statistic: 0.4364 on 1 and 30 DF, p-value: 0.5139
##
##
## Call:
## lm(formula = later_cases_change ~ percent_under_75000, data = ac_data_change)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.0011503 -0.0005204 -0.0001303 0.0002934 0.0019030
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -4.723e-04 4.020e-04 -1.175 0.249313
## percent_under_75000 3.474e-05 8.587e-06 4.045 0.000337 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0007523 on 30 degrees of freedom
## Multiple R-squared: 0.353, Adjusted R-squared: 0.3314
## F-statistic: 16.36 on 1 and 30 DF, p-value: 0.0003371
##
##
## Call:
## lm(formula = later_log_cases_change ~ percent_under_75000, data = ac_data_change)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.83740 -0.31902 -0.05253 0.18362 0.92196
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.052022 0.233235 0.223 0.825
## percent_under_75000 0.022896 0.004982 4.596 7.27e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4364 on 30 degrees of freedom
## Multiple R-squared: 0.4132, Adjusted R-squared: 0.3936
## F-statistic: 21.13 on 1 and 30 DF, p-value: 7.268e-05
##
##
## Call:
## lm(formula = later_cases_change ~ percent_under_100000, data = ac_data_change)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.0010894 -0.0005129 -0.0001348 0.0002288 0.0019060
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -9.160e-04 4.751e-04 -1.928 0.063350 .
## percent_under_100000 3.538e-05 8.173e-06 4.329 0.000153 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0007337 on 30 degrees of freedom
## Multiple R-squared: 0.3845, Adjusted R-squared: 0.364
## F-statistic: 18.74 on 1 and 30 DF, p-value: 0.0001534
##
##
## Call:
## lm(formula = later_log_cases_change ~ percent_under_100000, data = ac_data_change)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.80272 -0.28174 -0.04411 0.17096 0.92320
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.256988 0.270652 -0.950 0.35
## percent_under_100000 0.023618 0.004656 5.072 1.9e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.418 on 30 degrees of freedom
## Multiple R-squared: 0.4617, Adjusted R-squared: 0.4437
## F-statistic: 25.73 on 1 and 30 DF, p-value: 1.9e-05
##
##
## Call:
## lm(formula = later_cases_change ~ percent_under_125000, data = ac_data_change)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.0009168 -0.0004999 -0.0001158 0.0001881 0.0020116
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.347e-03 5.854e-04 -2.301 0.028526 *
## percent_under_125000 3.661e-05 8.669e-06 4.223 0.000206 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0007406 on 30 degrees of freedom
## Multiple R-squared: 0.3728, Adjusted R-squared: 0.3519
## F-statistic: 17.83 on 1 and 30 DF, p-value: 0.0002062
##
##
## Call:
## lm(formula = later_log_cases_change ~ percent_under_125000, data = ac_data_change)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.69702 -0.25095 -0.05153 0.15852 0.98442
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.592856 0.326298 -1.817 0.0792 .
## percent_under_125000 0.025171 0.004833 5.209 1.29e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4129 on 30 degrees of freedom
## Multiple R-squared: 0.4749, Adjusted R-squared: 0.4574
## F-statistic: 27.13 on 1 and 30 DF, p-value: 1.292e-05
Try multiple regression analysis with these, filtering to valid visits data ranges.
ac_cases_dem_model_visits_5 <- lm(later_cases_change ~ percent_under_125000 + avg_household_size + pop_density + `percent more than 1 occupant` + `percent more than 1 unit` + visits_per_capita, ac_data_change_5 %>% filter(!is.na(visits_per_capita)))
summary(ac_cases_dem_model_visits_5)
##
## Call:
## lm(formula = later_cases_change ~ percent_under_125000 + avg_household_size +
## pop_density + `percent more than 1 occupant` + `percent more than 1 unit` +
## visits_per_capita, data = ac_data_change_5 %>% filter(!is.na(visits_per_capita)))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.0009177 -0.0003691 -0.0001696 0.0001847 0.0017177
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.168e-03 2.960e-03 1.070 0.2956
## percent_under_125000 2.169e-05 1.454e-05 1.492 0.1492
## avg_household_size -1.288e-03 9.857e-04 -1.307 0.2042
## pop_density -8.309e-02 1.008e-01 -0.824 0.4182
## `percent more than 1 occupant` 1.784e-04 7.317e-05 2.438 0.0229 *
## `percent more than 1 unit` -2.438e-05 1.692e-05 -1.441 0.1630
## visits_per_capita -3.594e-06 6.854e-05 -0.052 0.9586
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.000672 on 23 degrees of freedom
## Multiple R-squared: 0.582, Adjusted R-squared: 0.473
## F-statistic: 5.338 on 6 and 23 DF, p-value: 0.001409
ac_cases_dem_model_visits_log_5 <- lm(later_log_cases_change ~ percent_under_125000 + avg_household_size + pop_density + `percent more than 1 occupant` + `percent more than 1 unit` + visits_per_capita, ac_data_change_5 %>% filter(!is.na(visits_per_capita)))
summary(ac_cases_dem_model_visits_log_5)
##
## Call:
## lm(formula = later_log_cases_change ~ percent_under_125000 +
## avg_household_size + pop_density + `percent more than 1 occupant` +
## `percent more than 1 unit` + visits_per_capita, data = ac_data_change_5 %>%
## filter(!is.na(visits_per_capita)))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.59625 -0.20752 -0.04997 0.09367 0.86935
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.166444 1.606104 0.726 0.4750
## percent_under_125000 0.016656 0.007887 2.112 0.0458 *
## avg_household_size -0.643489 0.534877 -1.203 0.2412
## pop_density -39.979506 54.701297 -0.731 0.4722
## `percent more than 1 occupant` 0.083776 0.039707 2.110 0.0460 *
## `percent more than 1 unit` -0.011903 0.009181 -1.296 0.2077
## visits_per_capita 0.024724 0.037191 0.665 0.5128
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3646 on 23 degrees of freedom
## Multiple R-squared: 0.6581, Adjusted R-squared: 0.5689
## F-statistic: 7.378 on 6 and 23 DF, p-value: 0.0001733
Just include the variables that were relevant before.
ac_cases_dem_model_visits_5_2 <- lm(later_cases_change ~ percent_under_125000 + avg_household_size + `percent more than 1 occupant` + visits_per_capita, ac_data_change_5 %>% filter(!is.na(visits_per_capita)))
summary(ac_cases_dem_model_visits_5_2)
##
## Call:
## lm(formula = later_cases_change ~ percent_under_125000 + avg_household_size +
## `percent more than 1 occupant` + visits_per_capita, data = ac_data_change_5 %>%
## filter(!is.na(visits_per_capita)))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.0008467 -0.0004358 -0.0001939 0.0002918 0.0022880
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.066e-03 1.552e-03 -0.687 0.4984
## percent_under_125000 1.580e-05 1.362e-05 1.160 0.2569
## avg_household_size 5.777e-07 5.367e-04 0.001 0.9991
## `percent more than 1 occupant` 8.892e-05 4.998e-05 1.779 0.0874 .
## visits_per_capita 1.594e-05 6.370e-05 0.250 0.8045
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0006881 on 25 degrees of freedom
## Multiple R-squared: 0.5237, Adjusted R-squared: 0.4475
## F-statistic: 6.871 on 4 and 25 DF, p-value: 0.0007104
ac_cases_dem_model_visits_log_5_2 <- lm(later_log_cases_change ~percent_under_125000 + avg_household_size + `percent more than 1 occupant` + visits_per_capita, ac_data_change_5 %>% filter(!is.na(visits_per_capita)))
summary(ac_cases_dem_model_visits_log_5_2)
##
## Call:
## lm(formula = later_log_cases_change ~ percent_under_125000 +
## avg_household_size + `percent more than 1 occupant` + visits_per_capita,
## data = ac_data_change_5 %>% filter(!is.na(visits_per_capita)))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.65564 -0.14940 -0.06488 0.06747 1.14641
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.896615 0.831895 -1.078 0.2914
## percent_under_125000 0.013816 0.007301 1.892 0.0701 .
## avg_household_size -0.014984 0.287671 -0.052 0.9589
## `percent more than 1 occupant` 0.040170 0.026790 1.499 0.1463
## visits_per_capita 0.034096 0.034144 0.999 0.3276
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3688 on 25 degrees of freedom
## Multiple R-squared: 0.6198, Adjusted R-squared: 0.5589
## F-statistic: 10.19 on 4 and 25 DF, p-value: 4.927e-05